In this Rmarkdown report, I present the code and the analyses for the population structure and diversity statistics of a world-wide whole genome sampling of Zymoseptoria tritici. The figures are the ones used in the manuscript and the code here complements the other scripts available with the manuscripts.

library(knitr)
library(reticulate)

#Spatial analyses packages
library(raster)
library(sp)
library(rgdal)
library(maps)
library(geosphere)
library(geodist)
library(ggExtra)

#Data wrangling and data viz
library(purrr)
library(RColorBrewer)
library(plotly)
library(cowplot)
library(GGally)
library(corrplot)
library(pheatmap)
library(ggstance)
library('pophelper')
library(ggbiplot)
library(igraph)
library(ggraph)
library(ggtext)
library(scatterpie)
library(tidyverse)
library(ggExtra)
library(ggpubr)

#Pop structure and pop genomic
library(GenomicFeatures)
library(SNPRelate) #PCA
library(LEA) #Clustering
library(pophelper)
#library(PopGenome) #Summary statistics
library(gridExtra)
library(ggExtra)
library(multcomp)
library(lsmeans)

#GEA
library(lfmm)

#Statistics
library(car)
library(corrr)
library(lsmeans)
library(multcomp)

library(caret)
library(mgcv)

#Variables
world <- map_data("world")
project_dir="~/Documents/Postdoc_Bruce/Projects/WW_project/"
lists_dir = "~/Documents/Postdoc_Bruce/Projects/WW_project/WW_PopGen/Keep_lists_samples/"

#Data directories
data_dir=paste0(project_dir, "0_Data/")
metadata_dir=paste0(project_dir, "Metadata/")
fig_dir = "~/Documents/Postdoc_Bruce/Manuscripts/Feurtey_WW_Zt/Draft_figures/"

#Analysis directories
#-___________________
VAR_dir = paste0(project_dir, "1_Variant_calling/")
  depth_per_window_dir = paste0(VAR_dir, "1_Depth_per_window/")
  vcf_dir = paste0(VAR_dir, "4_Joint_calling/")
  mito_SV = paste0(VAR_dir, "6_Mito_SV/")
PopStr_dir = paste0(project_dir, "2_Population_structure/")
  nuc_PS_dir=paste0(PopStr_dir, "0_Nuclear_genome/")
  mito_PS_dir = paste0(PopStr_dir, "1_Mitochondrial_genome/")
Sumstats_dir = paste0(project_dir, "3_Sumstats_demography/")
TE_RIP_dir=paste0(project_dir, "4_TE_RIP/")
   RIP_DIR=paste0(TE_RIP_dir, "0_RIP_estimation/")
   DIM2_DIR=paste0(TE_RIP_dir, "1_Blast_from_denovo_assemblies/")
GEA_dir=paste0(project_dir, "5_GEA/")
fung_dir=paste0(project_dir, "6_Fungicide_resistance/")
virulence_dir = paste0(project_dir, "7_Virulence/")
sel_dir = paste0(project_dir, "8_Selection/")
  gene_list_dir = paste0(sel_dir, "0_Lists_unique_copy/")

#Files
vcf_name="Ztritici_global_March2021.genotyped.ALL.filtered.clean.AB_filtered.variants.good_samples.biall_SNP.max-m-1.maf-0.05.thin-1000bp"
vcf_name_nomaf="Ztritici_global_March2021.filtered-clean.AB_filtered.SNP.max-m-0.8.thin-1000bp"
vcf_name_mito = "Ztritici_global_March2021.genotyped.mt.filtered.clean.AB_filtered.variants.good_samples.max-m-80"
Zt_list = paste0(lists_dir, "Ztritici_global_March2021.genotyped.good_samples.args")
gff_file = paste0(data_dir, "Zymoseptoria_tritici.MG2.Grandaubert2015.no_CDS.gff3")
ref_fasta_file = paste0(data_dir, "Zymoseptoria_tritici.MG2.dna.toplevel.mt+.fa")
metadata_name = "Main_table_from_SQL_Feb_2020"
gene_annotation = read_tsv(paste0(data_dir, "Badet_GLOBAL_PANGENOME_TABLE.txt"))
complete_mito = read_tsv(paste0(data_dir, "Complete_mitochondria_from_blast.txt"), col_names = c("ID_file", "Contig"))

Sys.setenv(PROJECTDIR=project_dir)
Sys.setenv(VARDIR=VAR_dir)
Sys.setenv(VCFDIR=vcf_dir)
Sys.setenv(POPSTR=PopStr_dir)
Sys.setenv(MITOPOPSTR=mito_PS_dir)

Sys.setenv(SUMST=Sumstats_dir)
Sys.setenv(GEADIR=GEA_dir)

Sys.setenv(ZTLIST=Zt_list)
Sys.setenv(GFFFILE = gff_file)
Sys.setenv(REFFILE = ref_fasta_file)
Sys.setenv(VCFNAME=vcf_name)
Sys.setenv(VCFNAME_NOMAF=vcf_name_nomaf)
Sys.setenv(VCFNAME_MITO=vcf_name_mito)

#knitr::opts_chunk$set(echo = F)
knitr::opts_chunk$set(message = F)
knitr::opts_chunk$set(warning = F)
knitr::opts_chunk$set(results = T)
knitr::opts_chunk$set(dev=c('png', 'pdf'))


# Metadata and sample lists
##Filtered_samples
filtered_samples = bind_rows(
  read_tsv(paste0(metadata_dir, "Sample_removed_based_on_IBS.args"), col_names = "ID_file") %>%
  mutate(Filter = "IBS"),
read_tsv(paste0(metadata_dir, "Sample_with_too_much_NA.args"), col_names = "ID_file") %>%
  mutate(Filter = "High_NA"),
read_tsv(paste0(metadata_dir, "Samples_to_filter_out.args"), col_names = "ID_file") %>%
  mutate(Filter = "Mutants_etc"))

##Samples in vcf
genotyped_samples = read_tsv(Zt_list, col_names = "ID_file")

## Metadata of genotyped samples 
temp = read_tsv(paste0(metadata_dir, metadata_name, "_Description.tab"), col_names = F) %>% pull()

Zt_meta = read_delim(paste0(metadata_dir, metadata_name, "_with_collection.tab"), 
                 col_names = temp,
                 na = "\\N", guess_max = 2000) %>%
  unite(Coordinates, Latitude, Longitude, sep = ";", remove = F) %>%
  inner_join(., genotyped_samples)  %>%
  mutate(Country = ifelse(Country == "USA", paste(Country, Region, sep = "_"), Country)) %>%
  mutate(Country = ifelse(Country == "Australia", paste(Country, Region, sep = "_"), Country)) %>%
  mutate(Country = ifelse(Country == "NZ", "New Zealand", Country)) %>%
  mutate(Country = ifelse(Country == "CH", "Switzerland", Country)) %>%
  mutate(Latitude2 = round(Latitude, 2), Longitude2 = round(Longitude, 2)) %>%
  dplyr::select(ID_file, Sampling_Date, Collection,
                Country, Continent, Latitude, Longitude, Latitude2, Longitude2)

#genotyped_samples %>%
#  filter(!(ID_file %in% filtered_samples$ID_file)) %>%
#    write_tsv(Zt_list, col_names = F)



#Define colors
## For continents
#myColors <- c("#04078B", "#a10208", "#FFBA08", "#CC0044", "#5C9D06", "#129EBA","#305D1B")
myColors <- c("#DA4167", "grey", "#ffba0a", "#A20106", "#3F6E0C", "#129eba", "#8fa253" )
names(myColors) = levels(factor(Zt_meta$Continent))
Color_Continent = ggplot2::scale_colour_manual(name = "Continent", values = myColors)
Fill_Continent = ggplot2::scale_fill_manual(name = "Continent", values = myColors)

## For clustering
K_colors = c("#f9c74f", "#f9844a", "#90be6d", "#f5cac3", 
"#83c5be", "#f28482", "#577590", "#e5e5e5", "#a09abc",  "#52796f",
"#219ebc", "#003049", "#82c0cc", "#283618", "white")


## For correlations
mycolorsCorrel<- colorRampPalette(c("#0f8b8d", "white", "#a8201a"))(20)
Zt_meta
#Random gradients
greens=c("#1B512D", "#669046", "#8CB053", "#B1CF5F", "#514F59")
blues=c("#08386E", "#1C89C9", "#28A7C0", "#B0DFE8", "grey")

Sampling: time, geography, and mating types


Geography

The sampling of Z.tritici isolated from the natural environment covers almost the entirety of the wheat-grown continents. It is, however, highly heterogeneous. Europe has the highest sampling density. Several locations are heavily sampled, such a fields in Switzerland or the US.

#kable(Zt_meta %>% dplyr::count(Collection, name = "Number of genomes"))
Zt_meta %>% dplyr::count(Collection, name = "Number of genomes")
## # A tibble: 18 × 2
##    Collection             `Number of genomes`
##    <chr>                                <int>
##  1 BIOGER_Thierry                          17
##  2 Eschikon_2017                           94
##  3 ETHZ_2020                              128
##  4 Fillinger                                2
##  5 GWASpanel_BIOGER                        90
##  6 Hartmann_FstQst_2015                   132
##  7 Hartmann_Oregon_2016                    94
##  8 JGI                                     43
##  9 JGI_1                                    1
## 10 JGI_2                                   32
## 11 JGI_3                                   20
## 12 JGI_4                                    3
## 13 JGI_Thierry                             43
## 14 Megan_McDonald                          13
## 15 MMcDonald_2018                          92
## 16 Syngenta                               283
## 17 Third_batch_2018_BM_TM                  16
## 18 <NA>                                     6
max_circle = max(Zt_meta %>%
  dplyr::count(Latitude, Longitude, name = "Number_genomes") %>%
  dplyr::select(Number_genomes))

temp = Zt_meta %>%
   dplyr::count(Country, Latitude, Longitude, name = "Number_genomes") %>%
   filter(Number_genomes > 0)

empty_map = ggplot() + theme_void() +
  geom_polygon(data = world, aes(x=long, y = lat, group = group), fill="#ede7e3", alpha=0.7)

p1 = empty_map +
  geom_point(data = temp, 
             aes(x=as.numeric(Longitude), y=as.numeric(Latitude),size=Number_genomes, text = Country),
             alpha = 0.6, color = "#ff9f1c") +
  scale_size("Number of genomes", limits = c(1, max_circle)) + 
  coord_cartesian(xlim=c(-20, 60), ylim=c(20, 70)) +
  theme(legend.position = "none")
p2 = empty_map +
  geom_point(data = temp, 
             aes(x=as.numeric(Longitude), y=as.numeric(Latitude),size=Number_genomes, text = Country),
             alpha = 0.6, color = "#ff9f1c") +
  scale_size("Number of genomes", limits = c(1, max_circle)) + coord_cartesian(xlim=c(-160, -40), ylim=c(-80, 80)) +
  theme(legend.position = "none")
p3 = empty_map +
  geom_point(data = temp, 
             aes(x=as.numeric(Longitude), y=as.numeric(Latitude),size=Number_genomes, text = Country),
             alpha = 0.6, color = "#ff9f1c") +
  scale_size("Number of genomes", limits = c(1, max_circle)) + coord_cartesian(xlim=c(115, 175), ylim=c(-60, 10))  


#Plotting all the maps together! 
aus_map = cowplot::plot_grid(p3+ theme(legend.position = "none"), get_legend(p3), ncol = 2, rel_widths = c(1, 0.9))
ligne = cowplot::plot_grid(p1, aus_map, ncol = 1,  rel_heights = c(1, 0.7))
cowplot::plot_grid(p2, ligne, ncol = 2, rel_widths = c(0.7, 1)) 

In the analyses, I will often use countries as a geographical unit, with the exception of the United States, which will instead be divided in states.

countries = dplyr::count(Zt_meta, Country, Continent)

countries %>% filter(n >= 8) %>%
  ggplot(aes(x = Country, y =n, fill = Continent)) +
    geom_bar(stat = "identity") +
    Fill_Continent +
    theme_cowplot() +
    labs(x = element_blank(), y = "Number of samples") +
    theme(axis.text.x = element_text(angle = 40, hjust = 1, vjust = 1))

for (country in filter(countries, n >= 8) %>% pull(Country)) {
  country_name = str_replace(country, pattern = " ", replacement = "_")
  file_name = paste0(PopStr_dir, "Sample_list_", country_name, ".args")
  
  Zt_meta %>% filter(Country == country) %>%
    dplyr::select(ID_file) %>%
    write_tsv(file = file_name, col_names = F)
}

In some cases, countries are too high level, so I subdivide them into geoclusters, centered on rounded coordinates. These geoclusters have a lower limit in terms of sample number. TODO: remove the limit that I never use. There is little need for both.

for ( min_number in c(6, 10) ) {
  
  small_pops = Zt_meta %>%
    filter(!is.na(Country) & !is.na(Sampling_Date)) %>%
    dplyr::count(Country, Latitude2, Longitude2, Sampling_Date, name = "Number_genomes") %>%
    filter( Number_genomes >= min_number ) 
  
  map1 = ggplot() + 
    theme_void() +
    geom_polygon(data = map_data("world"), aes(x=long, y = lat, group = group), fill="#ede7e3")  +
    scale_size("Number of genomes", limits = c(1, max_circle))  +
    theme(legend.position = "None", 
          panel.border  = element_rect(fill=NA, colour = "grey",size = 0.5, linetype = 1))+
      scale_fill_manual(values = c("grey", K_colors) )
  
  p1 = map1 + coord_cartesian(xlim=c(-20, 60), ylim=c(20, 70)) + 
    geom_point(data = small_pops, 
               mapping = aes(x=Longitude2, y=Latitude2, size=Number_genomes),
               alpha = 0.6, color = "#2ec4b6")
  
  p2 = map1 + coord_cartesian(xlim=c(-160, -40), ylim=c(-80, 80)) + 
    geom_point(data = small_pops, 
               mapping = aes(x=Longitude2, y=Latitude2, size=Number_genomes),
               alpha = 0.6, color = "#2ec4b6")
  
  p3 = map1 + coord_cartesian(xlim=c(115, 175), ylim=c(-65, 10)) + 
    geom_point(data = small_pops, 
               mapping = aes(x=Longitude2, y=Latitude2, size=Number_genomes),
               alpha = 0.6, color = "#2ec4b6")
  
  aus_map = cowplot::plot_grid(p3, get_legend(p3), ncol = 2, rel_widths = c(1, 0.9))
  ligne = cowplot::plot_grid(p1, aus_map, ncol = 1,  rel_heights = c(1, 0.7))
  cowplot::plot_grid(p2, ligne, ncol = 2, rel_widths = c(0.7, 1)) 
  
  
  
  for (t in 1:nrow(small_pops)) {
    country = pull(small_pops[t, "Country"])
    year = pull(small_pops[t, "Sampling_Date"])
    lat = pull(small_pops[t, "Latitude2"])
    long = pull(small_pops[t, "Longitude2"])
    country_name = str_replace(country, pattern = " ", replacement = "_")
    
    for (i in 1:10 ){
      
      file_name = paste0(PopStr_dir, "Sample_list_subset_", 
                         paste(min_number, country_name, year, lat, long, i, sep = "_"), 
                         ".args")
      
      temp = Zt_meta %>% 
        filter(Country == country) %>%
        filter(Sampling_Date == year)  %>% 
        filter(Latitude2 == lat) %>%
        filter(Longitude2 == long) %>%
        dplyr::select(ID_file) %>%
        pull()
      
      write_tsv(as.tibble(sample(temp, size = min_number)), file = file_name, col_names = F) 
    }
  }
}


Time

The sampling also covers a wide range of years: starting from 1990 to 2017. Just as with the geographical repartition, some years are heavily sampled, reflecting sampling in specific fields done for previous experiments.

temp = as_tibble(c(min(Zt_meta$Sampling_Date, na.rm = T) : max(Zt_meta$Sampling_Date, na.rm = T))) %>%
  mutate(`Sampling year` = as.character(value))

sum_temp = Zt_meta %>%
    mutate(`Sampling year` = as.character(Sampling_Date)) %>%
    dplyr::count(`Sampling year`, Continent) %>%
    full_join(., temp) %>%
    mutate(`Genome number` = replace_na(n, 0))

sum_temp %>%
ggplot(aes(x=`Sampling year`, y =`Genome number`, fill = Continent)) +
  geom_bar(stat = "identity") +
  theme_bw() + theme(axis.title = element_blank(),
                     axis.text.x = element_text(angle = 60, hjust = 1)) +
  Fill_Continent


Mating type

I have blasted the known sequences of the two mating type genes against each of the assemblies and will now gather all of this data in a summary table.

mat1_c=0
mat2_c=0
while read sample ; 
do 
   if [ -f /data2/alice/WW_project/2_Population_structure/Mat1_1_${sample}.blast_out.tsv ] ; 
     then 
        mat1="OK" ; 
        mat1_c=$(($mat1_c + 1))
     else
        mat1="NO" ; 
    fi ; 
    
    if [ -f "/data2/alice/WW_project/2_Population_structure/Mat1_2_${sample}.blast_out.tsv" ] ; 
      then 
        mat2="OK"; 
        mat2_c=$(($mat2_c + 1))
     else
        mat2="NO" ; 
    fi ;  
    echo $sample $mat1 $mat2 ; 
done < Keep_lists_samples/Ztritici_global_March2021.genotyped.good_samples.args

echo $mat1_c $mat2_c
mat_genes_blast = read_delim(paste0(PopStr_dir, "Mat1_blast_all.tsv"), delim = " ",
           col_names = c("ID_file", "gene", "qseqid", "sseqid", "pident", "length",
                                              "mismatch", "gapopen", "qstart", "qend",
                                              "sstart", "send", "evalue", "bitscore")) %>%
  distinct() %>%
  dplyr::inner_join(Zt_meta) %>%
  group_by(ID_file, gene, Continent, Country, Latitude2, Longitude2) %>%
  dplyr::mutate(Cum_length = sum(length))%>% ungroup()

mat_genes = ungroup(mat_genes_blast) %>%
  dplyr::group_by(ID_file, gene, Continent, Country, Latitude2, Longitude2) %>%
  dplyr::summarize(Cum_length = sum(length)) %>% ungroup()


dplyr::count(mat_genes, gene, Cum_length) 
## # A tibble: 23 × 3
##    gene    Cum_length     n
##    <chr>        <dbl> <int>
##  1 Mat_1_1        182     2
##  2 Mat_1_1        801     1
##  3 Mat_1_1        859     1
##  4 Mat_1_1        886     1
##  5 Mat_1_1        893     2
##  6 Mat_1_1        895     1
##  7 Mat_1_1        898   517
##  8 Mat_1_1        899     5
##  9 Mat_1_1        900     1
## 10 Mat_1_1        904     1
## # … with 13 more rows
mat_genes %>%
  ggplot(aes(Cum_length)) + 
    geom_density() + 
    theme_bw() + 
    facet_wrap(vars(gene)) +
    labs(x = "Cumulative length of blast matches", y = "Density",
         title = "Sanity check for the blast search using the match length")

mat11_size = median(filter(mat_genes, gene == "Mat_1_1")$Cum_length)
mat12_size = median(filter(mat_genes, gene == "Mat_1_2")$Cum_length)

mat_genes_blast = mat_genes_blast %>%
  mutate(Filtering = case_when(gene == "Mat_1_1" & Cum_length >= mat11_size - 20 & Cum_length <= mat11_size + 20 ~ "Keep",
                               gene == "Mat_1_2" & Cum_length >= mat12_size - 20 & Cum_length <= mat12_size + 20 ~ "Keep",
                               TRUE ~ "Filter_out"))


# Count and plot per country
temp = mat_genes_blast %>% 
  dplyr::select(ID_file, gene, Continent, Country) %>%
  distinct() %>%
  dplyr::count(Continent, Country, gene) %>%
  filter(!is.na(Continent))

dplyr::filter(temp, !is.na(Country)) %>%
  dplyr::group_by(Continent, Country) %>%
  dplyr::mutate(Total = sum(n)) %>%
  filter(Total > 6) %>%
  ggplot(aes(x = Country, y = n, fill = gene))+ 
    geom_bar(stat = "identity", position = "fill") + 
    geom_hline(yintercept = .5) +
    theme_bw() +
    labs(y = "Number of isolates", 
         title = "Mat genes in countries including more than 6 samples",
         fill = "Mat type", x = "") +
  scale_fill_manual(values = c("#CBF3F0", "#2EC4B6")) +
  coord_flip()



Whole chromosomes CNV

Question: How prelavent is aneuploidy in natural populations of Z.tritici? In the case of accessory chromosomes, is there a correlation between phylogeny, environment, host or time and the presence/absence of some chromosomes?

Methods: Based on the depth of coverage for all samples, we can identify for both core and accessory chromosomes whether each isolates includes 1, 0 or several copies.

core_depth = read_tsv(paste0(depth_per_window_dir, "Depth_per_sample_core_chr_summary.q30.txt")) %>%
  mutate(Median_core = Median)

chrom_depth = read_tsv(paste0(depth_per_window_dir, "Depth_per_chromosome_summary.q30.txt")) %>%
  left_join(., core_depth %>% 
  dplyr::select(Sample, Median_core)) %>%
  mutate(Relative_depth = Median/Median_core) %>%
  left_join(.,Zt_meta, by = c("Sample" = "ID_file")) %>%
  filter(CHROM != "mt") %>%
  mutate(Sample = fct_reorder(Sample, Sampling_Date)) %>%
  mutate(Sample = fct_reorder(Sample, Country)) %>%
  mutate(Sample = fct_reorder(Sample, Continent)) 

heatmap_depth = chrom_depth %>%
  filter(CHROM != "mt") %>%
  ggplot(aes(x = as.numeric(CHROM), y=Sample, fill=Relative_depth)) +
  geom_tile() + scale_fill_gradient2(low="white", high = "#2ec4b6") +
  geom_vline(xintercept = 13.5, linetype = "longdash", colour = "gray20") +
  theme(axis.text.y = element_blank(),
        axis.title.y = element_blank(), axis.ticks.y=element_blank()) +
  labs(fill = "Depth", x= "Chromosome") + xlim(c(0.5, 21.5))


plot_continent = chrom_depth %>%
  ggplot(aes(x = 1, y=Sample, fill=Continent)) +
  geom_tile(aes(width = 2))  +
  theme_classic() +
  theme(axis.text.y = element_blank(), axis.ticks.y=element_blank(),
        legend.position="left",
        axis.text.x=element_text(colour="white")) +
  labs(y= "Isolate") + Fill_Continent

plot_grid(plot_continent, heatmap_depth, rel_widths = c(2, 5))

In the heatmap, I represent the depth normalized by the median depth over all core chromosomes. As expected, the copy-number variation at the chromosome scale affects mostly the accessory chromosomes (AC). There is some presence of supernumerary AC and a lot of presence-absence variation. Supernumerary chromosomes can also be found in the core chromosomes but this is almost anecdotal as over the whole sampling this was found only in 9 cases.

Lthres = 0.50
Hthres = 1.50
depth = chrom_depth %>%
  dplyr::filter(!is.na(Relative_depth)) %>%
  dplyr::mutate(Depth_is = ifelse(Relative_depth > Hthres, "High", ifelse(Relative_depth < Lthres, "Low", "Normal"))) %>%
  mutate(CHROM = fct_reorder(CHROM, as.numeric(CHROM)))  

bar_Ndepth_per_CHR =ggplot(depth, aes(x = CHROM, fill = Depth_is)) +
  geom_bar(stat = "count") +
  scale_fill_manual(values =c( "#2ec4b6", "#cbf3f0","#EDE7E3")) +
    theme_light()+
    labs(x= "Chromosome", y = "Number of isolates")

#lollipop plots
##For high normalized depth values
temp = depth %>%
  filter(Depth_is == "High") %>%
  dplyr::group_by(CHROM) %>%
  dplyr::count() %>%
  mutate(CHROM = fct_reorder(CHROM, as.numeric(CHROM))) 
lolhigh =  ggplot(temp, aes(x = as.character(CHROM), y = n)) +
    geom_segment( aes(x=as.character(CHROM), xend=as.character(CHROM), y=0, yend=max(temp$n)),
                  color="grey80", size = 1) +
    geom_segment( aes(x=as.character(CHROM), xend=as.character(CHROM), y=0, yend=n),
                  color="grey20", size = 1) +
    geom_point( color="#2ec4b6", size=4)  +
    geom_text(aes( label = n,
                     y= n), stat= "identity",
              hjust = -0.5, vjust = -0.2) +
    theme_light() +
    theme(
    panel.grid.major.y = element_blank(),
    panel.border = element_blank(),
    axis.ticks.x = element_blank(),
    axis.text.x = element_text(size = 10),
    axis.text.y = element_text(size = 10)
    ) +
    ylim(c(0,max(temp$n)+2+ max(temp$n)*0.1)) +
    labs(x= "Chromosome", y = "Number of isolates with supernumerary chromosome") +
    coord_flip()

##For low normalized depth values
temp = depth %>%
  filter(Depth_is == "Low") %>%
  dplyr::group_by(CHROM) %>%
  dplyr::count()%>%
  mutate(CHROM = fct_reorder(CHROM, as.numeric(CHROM)))

lollow = ggplot(temp, aes(x = CHROM, y = n)) +
    geom_segment( aes(x=CHROM, xend=CHROM, y=0, yend=max(temp$n)),
                  color="grey80", size = 1) +
    geom_segment( aes(x=CHROM, xend=CHROM, y=0, yend=n),
                  color="grey20", size = 1) +
    geom_point( color="#cbf3f0", size=4)  +
    geom_text(aes( label = n,
                     y= n), stat= "identity",
              hjust = -0.5, vjust = -0.2) +
    theme_light() +
    theme(
    panel.grid.major.y = element_blank(),
    panel.border = element_blank(),
    axis.ticks.x = element_blank(),
    axis.text.x = element_text(size = 10),
    axis.text.y = element_text(size = 10)
    ) +
    ylim(c(0,max(temp$n)+ max(temp$n)*0.1)) +
    labs( x= "Chromosome", y = "Number of isolates without chromosome") +
    coord_flip()

bottom_row <- cowplot::plot_grid(lolhigh, lollow, labels = c('B', 'C'), label_size = 12)

plot_grid(bar_Ndepth_per_CHR, bottom_row, labels = c('A', ''), label_size = 12, ncol = 1)

Here is a table including the isolates with supernumerary core chromosomes.

depth  %>%
  dplyr::filter(Depth_is == "High") %>%
  dplyr::filter(as.numeric(CHROM) < 13) %>%
  dplyr::select(Sample, CHROM, Median, Median_core, Collection, Country)
## # A tibble: 18 × 6
##    Sample                  CHROM Median Median_core Collection           Country
##    <fct>                   <fct>  <dbl>       <dbl> <chr>                <chr>  
##  1 08STCZ011               12        47          24 Syngenta             Czech …
##  2 08STF035                5        154          79 Syngenta             France 
##  3 08STF036                12       159          82 Syngenta             France 
##  4 09STD041                4        157          86 Syngenta             Germany
##  5 ST00ARG_BD0069          5         17           9 <NA>                 <NA>   
##  6 ST13SP_Biog_SeptoDur104 4        100          49 BIOGER_Thierry       Spain  
##  7 ST16CH_2X10             1          2           1 <NA>                 <NA>   
##  8 ST16CH_2X10             2          2           1 <NA>                 <NA>   
##  9 ST16CH_2X10             3          2           1 <NA>                 <NA>   
## 10 ST16DK_Biog_DK15        5         16           8 <NA>                 <NA>   
## 11 ST90ORE_a12_3B_11       12        32          19 Hartmann_FstQst_2015 USA_Or…
## 12 STnnJGI_SRR4235099      12        28          17 JGI_2                USA_Or…
## 13 STnnJGI_SRR4235109      5         33          17 JGI_2                Switze…
## 14 STnnJGI_SRR5194526      12        78          40 JGI                  Sweden 
## 15 STnnJGI_SRR5194528      5         84          42 JGI                  Sweden 
## 16 STnnJGI_SRR5194605      5         73          37 JGI                  Hungary
## 17 STnnJGI_SRR5829692      4        165          84 JGI                  Kenya  
## 18 STnnJGI_SRR5829692      5        163          84 JGI                  Kenya

And an overlook of the accessory chromosomes PAV per continent (only considering continents with more than 10 isolates).

depth %>%
  dplyr::filter(as.numeric(CHROM) > 13) %>%
  filter(!is.na(Continent)) %>%
  dplyr::group_by(Continent, CHROM) %>%
  dplyr::mutate(Count_sample_per_continent = n()) %>%
  dplyr::filter(Count_sample_per_continent >= 10) %>%
  ggplot(aes(x = Continent, fill = Depth_is)) +
  geom_bar(position = "fill" ) + facet_wrap(CHROM~.) +
  theme_light() +
  scale_fill_manual(values = c("#2ec4b6", "#cbf3f0", "#EDE7E3")) +
    theme(axis.text.x = element_text(angle = 40, hjust = 1)) +
  ylab("Proportion of chromosomes") + xlab("")



Population structure


Question: Is the world-wide population of Z.tritici structured? If so, is it structured according to geography, host or time (or any other relevant info we hopefully have)?

Previous genomic work has shown very clear structure between populations of Z.tritici. However,the sampling was extremely heterogeneous. With a more geographically even sampling, do we also observe a clear-cut structure?
Methods: The methods I chose to use to investigate the structure were the following:

  • PCA
  • Structure-like analysis
  • Population tree

Structure-like clustering

The clustering here is done by using the snmf method from the LEA R package (http://membres-timc.imag.fr/Olivier.Francois/LEA/) on the same subset of SNPs as the PCA, but without any missing data. I ran the analysis for a K (number of cluster inferred) ranging from 1 to 15 and with 10 repeats for each K.

Clustering


vcftools --vcf ${VCFDIR}$VCFNAME.recode.vcf \
  --extract-FORMAT-info GT \
  --out ${POPSTR}$VCFNAME

cat  ${POPSTR}$VCFNAME.GT.FORMAT | cut -f 3- \
    > ${POPSTR}$VCFNAME.GT.FORMAT2
cat  ${POPSTR}$VCFNAME.GT.FORMAT | cut -f 1,2 \
    > ${POPSTR}$VCFNAME.GT.FORMAT.pos
head -n1 ${POPSTR}$VCFNAME.GT.FORMAT2 | gsed "s/\t/\n/g"  \
    > ${POPSTR}$VCFNAME.ind
gsed "s/\t//g"  ${POPSTR}$VCFNAME.GT.FORMAT2 | tail -n +2 \
    > ${POPSTR}$VCFNAME.geno
    
datamash transpose < ~/Documents/Postdoc_Bruce/Projects/WW_project/2_Population_structure/Ztritici_global_March2021.genotyped.ALL.filtered.clean.AB_filtered.variants.good_samples.biall_SNP.max-m-1.maf-0.05.thin-1000bp.even_sampling.GT.FORMAT2  |  sed 's/^/>/' | sed 's/\t/\n/' | sed 's/\t//g' | cut -c -150 > ~/Documents/Postdoc_Bruce/Projects/WW_project/2_Population_structure/Ztritici_global_March2021.genotyped.ALL.filtered.clean.AB_filtered.variants.good_samples.biall_SNP.max-m-1.maf-0.05.thin-1000bp.even_sampling.fasta
#project = snmf(paste0(PopStr_dir, vcf_name, ".geno"), K=1:15, entropy = TRUE,
#                        repetitions = 10, project = "new", ploidy = 1)
# Reading the results from the snmf runs
# ______________________________________
#Sample names
indv_snmf = read_tsv(paste0(PopStr_dir, vcf_name, ".ind"), col_names = F)
names(indv_snmf) = "Sample"

#Load project
project = load.snmfProject(paste0(PopStr_dir, vcf_name, ".snmfProject"))

K_list = c(1:15)

#Extracting the clustering info for the best run per K
datalist = list()
for (i in K_list){
  best = which.min(cross.entropy(project, K = i))
  temp = as.data.frame(Q(project, i, best))
  temp= cbind(indv_snmf, temp)

  temp = temp %>%
    gather("Cluster", "Admix_coef", -"Sample") %>%
    mutate(K=i)
   datalist[[i]] = as.tibble(temp)
}

snmf_results_per_K = bind_rows(datalist) %>%
  inner_join(., Zt_meta, by = c("Sample" = "ID_file")) %>%
  unite(Continent, Country, col = "for_display", remove = F)  


#sNMF pretty plots
# _______________
afiles = character(length(K_list))
for (i in K_list){
  best = which.min(cross.entropy(project, K = i))
  afiles[i] = Sys.glob(paste0(PopStr_dir, vcf_name, ".snmf/K",i, "/run", best, "/*Q"))
}

# create a qlist
qlist <- readQBasic(afiles)
al_qlist = alignK(qlist)

lab_set = inner_join(indv_snmf, Zt_meta, by = c("Sample" = "ID_file")) %>%
  dplyr::select(Continent, Country) %>%
  mutate(Continent = ifelse(is.na(Continent), "Unknown", Continent),
         Country = ifelse(is.na(Country), "Unknown", Country))

#Low numbers
from = 2
up_to = 6
p1 <-   plotQ(alignK(qlist[from:up_to], type = "across"),
            imgoutput="join",
            returnplot=T,exportplot=F,
            basesize=11,
            splab= paste0("K=", K_list[from:up_to]),
            grplab=lab_set, ordergrp=T, grplabangle = 40, grplabheight = 2, grplabsize = 2,
            clustercol = K_colors)

grid.arrange(p1$plot[[1]])

#Medium numbers
from = 7
up_to = 11
p2 <-   plotQ(alignK(qlist[from:up_to], type = "across"),
            imgoutput="join",
            returnplot=T,exportplot=F,
            basesize=11,
            splab= paste0("K=", K_list[from:up_to]),
            grplab=lab_set, ordergrp=T, grplabangle = 40, grplabheight = 2, grplabsize = 2,
            clustercol = K_colors)
grid.arrange(p2$plot[[1]])

#High numbers
from = 12
up_to = 15
p3 <-   plotQ(alignK(qlist[from:up_to], type = "across"),
            imgoutput="join",
            returnplot=T,exportplot=F,
            basesize=11,
            splab= paste0("K=", K_list[from:up_to]),
            grplab=lab_set, ordergrp=T, grplabangle = 40, grplabheight = 2, grplabsize = 2,
            clustercol = K_colors)
grid.arrange(p3$plot[[1]])

I need to identify the isolates which belong to each of the clusters. For this, we need to set a threshold, since there are very few (or even no) isolates assigned fully to one only. I test two thresholds: isolates assigned to one cluster with a value higher than 0.75 and 0.9.

#Setting thresholds to compare
chosen_threshold = 0.75
chosen_threshold2 = 0.9

pure_by_threshold = bind_rows(snmf_results_per_K %>%
    filter(Admix_coef > chosen_threshold) %>% 
    mutate(Threshold = paste0("> ", chosen_threshold)), 
  snmf_results_per_K %>%
    filter(Admix_coef > chosen_threshold2) %>%
    mutate(Threshold = paste0("> ", chosen_threshold2)))


# Number of pure isolates per K
pure_by_threshold %>%
    dplyr::group_by(K, Threshold) %>%
    dplyr::count() %>%
    ggplot(aes(x = as.factor(K), y = n, fill = Threshold)) +
       geom_bar(stat = "identity", position = "dodge") +
       theme_bw() + scale_fill_manual(values = c("#2ec4b6", "#cbf3f0"))+ 
    labs(x = "K", y = "Number of isolates", title = "Pure isolates per K") 

# Number of pure isolates per K per country
temp2 = pure_by_threshold %>%
  dplyr::group_by(Continent, for_display, Cluster, K, Threshold) %>%
  dplyr::count() 

temp2 %>%
  filter(K > 9 & K < 16) %>%
  ggplot(aes(x = Cluster, y = for_display,
             size = n, color = Continent)) +
  geom_point(alpha = 0.3) + Color_Continent+ theme_bw() +
  theme(axis.text.x = element_text(angle = 60, hjust = 1)) +
  facet_grid(rows = vars(Threshold), cols = vars(K), scales = "free_x") + 
    labs(x = "", y = "", size = "Nb of isolates",
         title = "Pure isolates per country and per K") 

Choice of K

Choosing the number K of clusters that best represent the data at hand is always a challenge. First, let’s look at the cross-validation results. sNMF estimates an entropy criterion which evaluates the quality of fit of the model to the data, potentially helping to find the best number of ancestral populations.

#Dot plot of the cross-entropy 
#plot(project, col = "goldenrod", pch = 19, cex = 1.2) #native method
cross_ent = list()
for (i in K_list){
  cross_ent[[i]] = as_tibble(cross.entropy(project, K = i), rownames = "NA") %>% 
    mutate( K = i)
  colnames(cross_ent[[i]] ) <- c("Run", "Crossentropy", "K")
}
cross_ent = bind_rows(cross_ent)
temp = cross_ent %>% group_by(K) %>% dplyr::summarize(average = mean(Crossentropy), minimum = min(Crossentropy))

p1 = ggplot() +
  geom_point(data = cross_ent, aes(x = as.factor(K), y = Crossentropy), alpha = 0.4, size = 2, col = "#cbf3f0") +
  geom_point(data = temp, aes(x = as.factor(K), y = minimum), alpha = 0.4, size = 2, col = "#2ec4b6") +
  theme_cowplot() +
    labs(x = "K", y = "Cross-entropy")

p2 = pure_by_threshold %>%
  filter(Threshold == paste0("> ", chosen_threshold)) %>%
  filter(K > 1) %>%
  dplyr::group_by(Cluster, K) %>%
  dplyr::count()%>%
  group_by(K) %>%
  dplyr::summarize(Size_smallest_cluster = min(n)) %>%
  ggplot(aes(x = as.factor(K), label = Size_smallest_cluster)) +
    geom_bar(aes(y = Size_smallest_cluster, fill = K), stat = "identity") + 
    theme_cowplot() +
    geom_label(aes(y = Size_smallest_cluster + 7)) +
    scale_fill_continuous(high = "#2ec4b6", low = "#cbf3f0") +
    labs(x = "K", y = "Size of the smallest cluster")

p3 = pure_by_threshold %>%
  filter(Threshold == paste0("> ", chosen_threshold)) %>%
    dplyr::group_by(K) %>%
    dplyr::count() %>%
    ggplot(aes(x = as.factor(K), y = n, fill = K)) +
       geom_bar(stat = "identity", position = "dodge") +
       theme_bw() +
       scale_fill_continuous(high = "#2ec4b6", low = "#cbf3f0") +
    labs(x = "K", y = "Number of assigned samples")

top_row = cowplot::plot_grid(p1, p3 + theme(legend.position = "none"), ncol = 2)
cowplot::plot_grid(top_row, p2 + theme(legend.position = "none"), ncol = 1)

In the case of our analysis, we do not have a very clear-cut minimum value for the cross-entropy criterion value. There is however a plateau starting from K=10. I also look at the number of samples assigned to one cluster or another based on the threshold set previously, as well as at the smallest cluster size. If the cluster size is very small, I would not be able to get meaningful information about them. However, this might be a hint that very real clusters exist in areas where our sampling is too sparse. This is an area to explore further in the future with more comprehensive sampling across the globe.

Based on the information contained in the plots above, I chose to proceed with 11 genetic clusters. I will write different tables to keep the information related to this clustering. For many analyses, the number of samples has an effect of the value computed, so I also create subsets of samples based on the number of isolates from the smallest clusters. For each cluster, I randomly draw 10 times the isolates.

#These values are chosen based on the plot above
chosen_K = 11

write_tsv(snmf_results_per_K %>% filter(K == chosen_K), 
          file = paste0(PopStr_dir, vcf_name, ".snmf_results_chosen_K.tab"))

#Looking at individuals with admixture coef higher than the threshold defined above.
high_anc_coef_snmf = snmf_results_per_K %>%
  filter(K == chosen_K) %>%
  filter(Admix_coef > chosen_threshold)


##Writing out tables for later
high_anc_coef_snmf %>% dplyr::select(Sample) %>%
  write_tsv(., file = paste0(PopStr_dir, vcf_name, ".high_anc_coef_snmf.ind"),
            col_names = F)
high_anc_coef_snmf %>%
  write_tsv(., file = paste0(PopStr_dir, vcf_name, ".high_anc_coef_snmf.tsv"),
            col_names = T)



# Making lists of samples per cluster for use later
max_continent = dplyr::count(high_anc_coef_snmf, Cluster, Continent) %>%
  group_by(Cluster) %>%
  mutate(somme = sum(n), prop = n/somme) %>%
  filter(prop > 0.5) %>%
  dplyr::select(Cluster, Continent)

clusters = dplyr::count(high_anc_coef_snmf, Cluster, Continent)


## Files with all pure isolates per cluster
for (cluster in max_continent %>% pull(Cluster)) {
  file_name = paste0(PopStr_dir, "Sample_list_", cluster, ".args")
  high_anc_coef_snmf %>% filter(Cluster == cluster) %>%
    dplyr::select(Sample) %>%
    write_tsv(file = file_name, col_names = F)
}


## Files with a similar number pure isolates between cluster

minimum_cluster_size = min(dplyr::count(high_anc_coef_snmf, Cluster) %>% dplyr::select(n))

for (cluster in max_continent %>% pull(Cluster)) {
  for (i in 1:10 ){
    file_name = paste0(PopStr_dir, "Sample_list_", cluster, "_", i, ".args")
    temp = high_anc_coef_snmf %>% 
      filter(Cluster == cluster) %>%
      sample_n(size = minimum_cluster_size) %>%
      dplyr::select(Sample)
    write_tsv(temp, file = file_name, col_names = F) 
  }
}

Phylogeography

Once a adequate number of cluster is selected, I investigate their geographical distribution.

#Table of pure samples numbers per continent
kable(snmf_results_per_K %>%
  filter(K == chosen_K) %>%
  filter(Admix_coef > chosen_threshold) %>%
  dplyr::group_by(Continent, Cluster) %>%
  dplyr::count() %>%
  pivot_wider(names_from = Continent, values_from =n, values_fill = 0))
Cluster Africa Europe Middle East North America Oceania South America NA
V11 21 8 2 0 0 0 0
V2 4 553 0 0 0 0 2
V6 0 0 40 0 0 0 0
V7 0 0 16 0 0 0 0
V10 0 0 0 178 0 0 1
V4 0 0 0 1 36 0 0
V8 0 0 0 20 0 0 0
V1 0 0 0 0 43 0 0
V3 0 0 0 0 31 0 0
V5 0 0 0 0 0 31 0
V9 0 0 0 0 0 16 0
##Bubble plot
p_cluster = high_anc_coef_snmf %>%
  dplyr::group_by(Continent, for_display, Cluster) %>%
  dplyr::count() %>%
  ggplot(aes(x = for_display, y = Cluster,
             size = n, color = Continent)) +
  geom_point(alpha = 0.5) + Color_Continent+ theme_bw() +
  theme(axis.text.x = element_text(angle = 60, hjust = 1)) +
    labs(x = "", 
         title = "Pure isolates per country for the chosen K value",
         subtitle = str_wrap(paste0("Number of genotypes with admix coef > ", chosen_threshold, 
                                    " assigned to ", chosen_K, " clusters."),
                             width = 70),
         size = "Nb of isolates") 
p_cluster

The clustering data can also be represented as an average of ancestry coefficient per country. This is done here first with barplots.

#Setting data with the chosen number of K as well as the chosen coefficient threshold
chosen_coef = snmf_results_per_K %>% 
  filter(K == chosen_K) %>%
  filter(Country != "NA") %>%
  filter(Country != "USA_NA") %>%
  dplyr::select(Sample, Continent, Country, Admix_coef, Cluster, Latitude, Longitude) 


K_colors = c("#8ECAE6", #V1 Aus (TAS)
             "#49810E", #V10 USA
             "#E16684", #V11 North Africa
             "#FFBA0A", #V2 Europe
             "#126782", #V3 NZ
             "#219EBC", #V4 Australia (NSW)
             "#8FA253", #V5 Uruguay + Argentina
             "#650104", #V6 Israel + Turkey
             "#DE020A", #V7 Iran
             "#2A4908", #V8 Canada
             "#B3C186") #V9 Boliva + Chile + Ecuador


#Identifying the main cluster per country
cluster_per_country = high_anc_coef_snmf %>%
  group_by(Country, Cluster) %>%
  dplyr::summarise(n = n()) %>%
  dplyr::mutate(freq = n / sum(n)) %>%
  filter(freq > 0.5) %>%
  dplyr::select(Country, Main_country_cluster = Cluster)

# Cluster composition per country for all continents
temp = chosen_coef %>%
  group_by(Continent, Country, Cluster) %>%
  dplyr::summarize(average_coef = mean(Admix_coef),
                   Nb_per_country = n()) %>%
  dplyr::mutate(Cluster2 = ifelse(average_coef < 0.1, "Other", Cluster))  %>%
  filter(Nb_per_country >= 6) 

#Bar plot per country
temp %>% filter(Continent != "NA") %>%
  filter(Continent != "Asia") %>%
  ggplot(aes(x = Country, y = average_coef, fill = Cluster2))  + 
    geom_bar(stat = "identity") +
    scale_fill_manual(values = c("grey", K_colors) ) + 
    theme_cowplot() +
     labs(subtitle = "All continents, limited to countries with 6 or more isolates",
         title = "Cluster ancestry per country",
         y = "Average of the ancestry coefficient", fill = "Cluster") +
    facet_grid(cols = vars(Continent), switch = "x", scales = "free_x", space = "free_x") +
    theme(axis.title.x = element_blank(), 
          axis.title.y = element_text(size = 12),
          axis.text.x = element_text(angle = 50, hjust = 1, vjust = 1, size = 8),
          axis.text.y = element_text(hjust = 1, vjust = 1, size = 10),
          panel.spacing = unit(0, "lines"), 
          strip.background = element_blank(),
          strip.placement = "bottom",
          strip.text = element_text(size = 10),
          legend.position = "bottom")

Such visualization is useful but not as intuitive as a map! Let’s use the same summarising method but with a more local viewpoint (I use rounded coordinates instead of country).

#Summarizing based on samples found in neighbouring areas
temp = chosen_coef  %>%
  mutate(Latitude = round(Latitude/2)*2, Longitude = round(Longitude/2)*2)%>%
  group_by(Continent, Country, Cluster, Longitude, Latitude) %>%
  summarize(average_coef = mean(Admix_coef), number_of_isolates = n()) %>%
  filter(!is.na(Latitude))

write_tsv(temp, file = paste0(PopStr_dir, vcf_name, ".chosen_coef_pies.tab"))

#Transforming to fit the scatter pie requirements
pies  = temp %>% #filter(number_of_isolates > 2) %>%
  mutate(Cluster2 = ifelse(average_coef < 0.1, "Other", Cluster)) %>%
  group_by(Continent, Longitude, Latitude, Country, Cluster2, number_of_isolates) %>%
  dplyr::summarize(to_plot = sum(average_coef)) %>%
  arrange(Cluster2) %>%
  pivot_wider(names_from = Cluster2, values_from = to_plot, values_fill = 0) %>%
  mutate(radius = ifelse(number_of_isolates > 10, 1.5, log(number_of_isolates))) %>%
  dplyr::select(-number_of_isolates)  




# Simple map of the world

map1 = ggplot() + 
  theme_void() +
  geom_polygon(data = map_data("world"), aes(x=long, y = lat, group = group), fill="#ede7e3")  +
  scale_size("Number of genomes", limits = c(1, max_circle))  +
  theme(legend.position = "None", 
        panel.border  = element_rect(fill=NA, colour = "grey",size = 0.5, linetype = 1)) +
    scale_fill_manual(values = c(c("grey"), K_colors))

#Splitting the map into our 3 main focus areas
p1 = map1 + coord_cartesian(xlim=c(-20, 60), ylim=c(20, 70)) + 
  geom_scatterpie(data = pies, mapping = aes(x=Longitude, y=Latitude, group=Country, r = radius), 
                  cols = c("Other", "V1", "V10", "V11", "V2", "V3", "V4", 
                           "V5", "V6", "V7", "V8", "V9"), color=NA, alpha=.8)
p2 = map1 + coord_cartesian(xlim=c(-160, -40), ylim=c(-80, 80)) + 
  geom_scatterpie(data = pies, mapping = aes(x=Longitude, y=Latitude, group=Country, r = radius*2), 
                   cols = c("Other", "V1", "V10", "V11", "V2", "V3", "V4", 
                           "V5", "V6", "V7", "V8", "V9"), color=NA, alpha=.8)
p3 = map1 + coord_cartesian(xlim=c(115, 185), ylim=c(-65, 10)) + 
  geom_scatterpie(data = pies, mapping = aes(x=Longitude, y=Latitude, group=Country, r = radius*2), 
                   cols = c("Other", "V1", "V10", "V11", "V2", "V3", "V4", 
                           "V5", "V6", "V7", "V8", "V9"), color=NA, alpha=.8)

#Plotting all the maps together! 
aus_map = cowplot::plot_grid(p3, get_legend(p1), ncol = 2, rel_widths = c(1, 0.75))
ligne = cowplot::plot_grid(p1, aus_map, ncol = 1,  rel_heights = c(1, 0.7))
cowplot::plot_grid(p2, ligne, ncol = 2, rel_widths = c(0.8, 1)) 

ggsave(paste0(fig_dir, "Str_scatter_pie.pdf"), width = 18, height = 10, units = "cm")


#p2 + theme(
#    legend.position = c(.05, .05),
#    legend.justification = c("left", "bottom"),
#    legend.box.just = "left",
#    legend.margin = margin(1,1,1,1)
#    )+scale_color_discrete(guide="none") 

Inter-cluster hybridization

For each country, we define the local cluster with “votes”: the cluster to which more than half of the non-admixed isolates are originating is considered to be the local cluster. We can quantify the isolates which are non-admixed (or “pure”) isolates from the local cluster, non-admixed from another cluster, or hybrid.

#For each sample, identify if is hybrid, local pure or pure from somewhere else.
status_admix = left_join(chosen_coef, cluster_per_country) %>% 
  mutate(Main_country_cluster = ifelse(!is.na(Main_country_cluster), Main_country_cluster, "No_main")) %>%
  group_by(Sample, Continent, Country) %>%
  dplyr::mutate(max_admix = max(Admix_coef),
         max_cluster = ifelse(Admix_coef == max_admix, Cluster, "")) %>%
  filter(max_cluster != "") %>%
  dplyr::mutate(Status = ifelse(max_admix < chosen_threshold, "Hybrid",
                         ifelse(max_cluster == Main_country_cluster, "Pure local", 
                                "Pure other")))

# Making a pie chart summary 
# Count isolate type and compute the position of labels
data = ungroup(status_admix) %>% 
  dplyr::count(Status, name = "Nb_sample") %>% 
  dplyr::mutate(Total = sum(Nb_sample)) %>%
  arrange(desc(Status)) %>%
  mutate(prop = Nb_sample / Total *100) %>%
  mutate(ypos = cumsum(prop)- 0.5*prop )

# Basic piechart
ggplot(data, aes(x="", y=prop, fill=Status)) +
  geom_bar(stat="identity", width=1, color="white") +
  coord_polar("y", start=0) +
  theme_void() + 
  theme(legend.position="right") +
  geom_text(aes(y = ypos, label = Nb_sample), color = "black", size=6) +
  scale_fill_manual(values = c("#ff9f1c", "#CBF3F0", "#2EC4B6"))

I also want an idea of where the hybrids are located.

# Summary of hybrid category per country
temp = status_admix %>% 
  group_by(Continent, Country, Main_country_cluster) %>% 
  dplyr::count(Status)

## As table
kable(pivot_wider(temp, names_from = Status, values_from = n, values_fill = 0))
Continent Country Main_country_cluster Hybrid Pure local Pure other
Africa Algeria V11 3 5 0
Africa Ethiopia No_main 4 0 0
Africa Kenya V2 4 2 0
Africa Morocco V11 0 2 0
Africa Tunisia V11 1 14 2
Asia Kazakhstan No_main 1 0 0
Europe Belarus No_main 1 0 0
Europe Belgium V2 0 14 0
Europe Czech Republic V2 2 18 0
Europe Denmark V2 0 9 0
Europe France V2 6 228 4
Europe Germany V2 1 63 0
Europe Hungary No_main 2 0 0
Europe Ireland V2 0 36 0
Europe Italy V11 1 2 0
Europe Latvia V2 0 2 0
Europe Netherlands V2 2 6 0
Europe Poland V2 0 5 0
Europe Portugal No_main 6 0 0
Europe Romania No_main 1 0 0
Europe Russia No_main 2 0 0
Europe Spain V11 4 2 0
Europe Sweden V2 0 6 0
Europe Switzerland V2 2 135 0
Europe UK V2 0 31 0
Europe Ukraine No_main 2 0 0
Middle East Iran V7 6 16 0
Middle East Israel V6 0 39 0
Middle East Syria V11 6 1 0
Middle East Turkey V11 10 1 0
Middle East Yemen V6 0 1 0
North America Canada V8 0 11 0
North America Mexico No_main 3 0 0
North America USA_California V10 0 7 0
North America USA_Indiana V10 4 7 0
North America USA_Minnesota V8 0 2 0
North America USA_Missouri V10 3 7 0
North America USA_North Dakota V8 0 7 0
North America USA_Ohio V10 0 3 0
North America USA_Oregon V10 0 145 0
North America USA_Texas V10 1 7 0
Oceania Australia_NA V4 1 9 3
Oceania Australia_NSW V4 0 27 0
Oceania Australia_Tasmania V1 0 40 0
Oceania New Zealand V3 24 31 0
South America Argentina V5 0 16 0
South America Bolivia V9 0 1 0
South America Chile V9 0 14 0
South America Ecuador V9 1 1 0
South America Peru No_main 1 0 0
South America Uruguay V5 1 15 0
## As a figure
ungroup(temp) %>%
  group_by(Country) %>%
  dplyr::mutate(Nb_per_country = sum(n)) %>%
  filter(Nb_per_country >= 4) %>%
  ggplot(aes(x = Country, y = n, fill = Status)) +
    geom_bar(stat = "identity", position = "fill") + 
    theme_cowplot() +
    scale_fill_manual(values =c("#2ec4b6", "#EDE7E3", "#cbf3f0")) +
    labs(y = "Proportion of isolates")+
    facet_grid(rows = vars(Continent), switch = "y", scales = "free_y", space = "free_y") +
    theme(axis.title.y = element_blank(), 
          axis.title.x = element_text(size = 14),
          axis.text.x = element_text(size =10),
          axis.text.y = element_text(size = 8),
          panel.spacing = unit(0, "lines"), 
          strip.background = element_blank(),
          strip.placement = "bottom",
          strip.text = element_text(size = 10),
          legend.position = "top") +
  coord_flip()

    #facet_wrap(vars(Continent), scales = "free")


#Table hybrids assigned to several clusters, and pure foreign
temp = inner_join(chosen_coef, 
                  status_admix %>% 
                    filter(Status != "Pure local") %>% 
                    dplyr::select(Sample, Status)) %>%
  dplyr::select(Sample, Continent, Country, Latitude, Longitude, Status, Cluster, Admix_coef) %>%
  mutate(Admix_coef = ifelse(Admix_coef > 0.2, round(Admix_coef, 4), NA)) %>%
  pivot_wider(names_from = Cluster, values_from = Admix_coef) %>%
  rowwise() %>%
  dplyr::mutate(Count_NA = sum(is.na(c_across(V1:V11)))) %>%
  filter(Status == "Pure other" | Count_NA < 10)

opts <- options(knitr.kable.NA = "")
kable(temp, digits = 3, )
Sample Continent Country Latitude Longitude Status V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 Count_NA
Ethiopia_1992_ETD1 Africa Ethiopia 8.730 39.010 Hybrid 0.281 0.567 9
Indiana_1993_I25a_1 North America USA_Indiana 40.420 -86.920 Hybrid 0.224 0.706 9
Missouri_1994_ST94MO15a_1 North America USA_Missouri 38.890 -92.210 Hybrid 0.221 0.698 9
ST08TN_26_5_4 Africa Tunisia 36.810 10.179 Pure other 0.805 10
ST13FR_Biog_SeptoDur10 Europe France 44.218 0.448 Hybrid 0.368 0.608 9
ST13FR_Biog_SeptoDur12 Europe France 44.218 0.448 Hybrid 0.449 0.548 9
ST13FR_Biog_SeptoDur29 Europe France 44.308 4.346 Pure other 0.765 10
ST13FR_Biog_SeptoDur8 Europe France 44.218 0.448 Hybrid 0.389 0.540 9
ST13FR_Biog_SeptoDur82 Europe France 44.308 4.346 Pure other 0.834 10
ST13FR_Biog_SeptoDur88 Europe France 44.308 4.346 Pure other 0.880 10
ST13IT_Biog_1819 Europe Italy 41.891 12.492 Hybrid 0.542 0.245 9
ST13NZ_St13_5_4 Oceania New Zealand -41.211 174.777 Hybrid 0.246 0.488 9
ST15NZ_St_15640_1 Oceania New Zealand -40.134 175.658 Hybrid 0.270 0.562 9
ST15NZ_St_15640_2 Oceania New Zealand -40.134 175.658 Hybrid 0.263 0.583 9
ST15NZ_St_15640_7 Oceania New Zealand -40.134 175.658 Hybrid 0.300 0.525 9
ST15NZ_St_15640_8 Oceania New Zealand -40.134 175.658 Hybrid 0.257 0.570 9
ST15NZ_St_15642_11 Oceania New Zealand -46.219 169.492 Hybrid 0.562 0.204 9
ST15NZ_St_15642_12 Oceania New Zealand -46.219 169.492 Hybrid 0.213 0.580 9
ST15NZ_St_15642_13 Oceania New Zealand -46.219 169.492 Hybrid 0.220 0.593 9
ST16KA_Biog_K32 Asia Kazakhstan 54.877 69.127 Hybrid 0.305 0.253 9
ST16RU_Biog_R1 Europe Russia 55.745 37.626 Hybrid 0.387 0.249 9
ST16RU_Biog_R19 Europe Russia 55.745 37.626 Hybrid 0.331 0.241 9
ST_SRR2834990 Oceania Australia_NA -35.296 149.122 Pure other 0.967 10
ST_SRR2835000 Oceania Australia_NA -35.296 149.122 Pure other 0.978 10
ST_SRR2835057 Oceania Australia_NA -35.296 149.122 Pure other 0.915 10
STnnJGI_SRR3452689 Middle East Turkey 39.931 32.833 Hybrid 0.252 0.622 9
STnnJGI_SRR3452696 North America Mexico 19.440 -99.134 Hybrid 0.630 0.237 9
STnnJGI_SRR3452697 South America Peru -12.022 -77.041 Hybrid 0.318 0.681 9
STnnJGI_SRR3452698 Europe Portugal 38.716 -9.127 Hybrid 0.280 0.254 0.216 8
STnnJGI_SRR3452699 Europe Portugal 38.716 -9.127 Hybrid 0.310 0.205 0.253 8
STnnJGI_SRR3452700 Africa Algeria 36.818 3.048 Hybrid 0.342 0.304 9
STnnJGI_SRR3452702 Middle East Syria 33.517 36.316 Hybrid 0.259 0.564 9
STnnJGI_SRR3452704 Africa Algeria 36.818 3.048 Hybrid 0.338 0.302 9
STnnJGI_SRR3452713 Europe France 48.853 2.347 Pure other 0.885 10
STnnJGI_SRR3452753 Europe Hungary 47.504 19.050 Hybrid 0.286 0.315 9
STnnJGI_SRR3452770 Middle East Syria 33.517 36.316 Hybrid 0.319 0.635 9
STnnJGI_SRR3452776 Africa Tunisia 36.810 10.179 Pure other 0.844 10
STnnJGI_SRR3452779 Europe Portugal 38.716 -9.127 Hybrid 0.332 0.312 9
STnnJGI_SRR4235082 Europe Portugal 38.716 -9.127 Hybrid 0.282 0.265 9
STnnJGI_SRR4235083 Africa Algeria 36.818 3.048 Hybrid 0.352 0.236 9
STnnJGI_SRR4235084 Oceania New Zealand -41.211 174.777 Hybrid 0.405 0.292 0.249 8
STnnJGI_SRR4235085 Africa Tunisia 36.810 10.179 Hybrid 0.401 0.236 9
STnnJGI_SRR4235086 North America Mexico 19.440 -99.134 Hybrid 0.660 0.215 9
STnnJGI_SRR4235089 Oceania New Zealand -41.211 174.777 Hybrid 0.406 0.234 0.265 8
STnnJGI_SRR4235092 Europe Portugal 38.716 -9.127 Hybrid 0.325 0.306 0.224 8
STnnJGI_SRR4235093 Europe Portugal 38.716 -9.127 Hybrid 0.227 0.224 0.232 8
STnnJGI_SRR4235113 Europe Romania 44.428 26.084 Hybrid 0.222 0.452 9
STnnJGI_SRR5194479 Europe Spain 40.416 -3.708 Hybrid 0.322 0.218 0.216 8
STnnJGI_SRR5194515 Europe Spain 40.416 -3.708 Hybrid 0.265 0.201 0.246 8
STnnJGI_SRR5194596 Middle East Iran 35.727 51.385 Hybrid 0.222 0.384 9
STnnJGI_SRR5194605 Europe Hungary 47.504 19.050 Hybrid 0.272 0.406 9
STnnJGI_SRR5194607 Middle East Syria 33.500 35.800 Hybrid 0.277 0.619 9
STnnJGI_SRR5829674 Oceania New Zealand -41.211 174.777 Hybrid 0.440 0.249 0.264 8
Syria_1992_SYK2 Middle East Syria 33.517 36.316 Hybrid 0.285 0.583 9
Syria_1992_SYT3 Middle East Syria 36.010 36.940 Hybrid 0.239 0.596 9
Ukraine_1995_ST95UR_BIc_1 Europe Ukraine 46.430 30.690 Hybrid 0.308 0.304 9
Ukraine_1995_ST95UR_F1a_3 Europe Ukraine 46.430 30.690 Hybrid 0.314 0.278 9
write_tsv(x = temp, file = paste0(PopStr_dir, "Hybrids_and_pure_foreign.tab"))



# Simple map of the world
map1 = ggplot() + 
  theme_void() +
  geom_polygon(data = map_data("world"), aes(x=long, y = lat, group = group), fill="#ede7e3")  +
  theme(panel.border  = element_rect(fill=NA, colour = "grey",size = 0.5, linetype = 1))

#Splitting the map into our 3 main focus areas
p1 = map1 + coord_cartesian(xlim=c(-20, 60), ylim=c(20, 70)) + 
  geom_jitter(data = temp, aes(Longitude, Latitude, col = Status), width = 1, height = 1) +
  scale_color_manual(values = c("#ff9f1c", "#2EC4B6"))
p2 = map1 + coord_cartesian(xlim=c(-160, -40), ylim=c(-80, 80)) + 
  geom_jitter(data = temp, aes(Longitude, Latitude, col = Status), width = 1, height = 1) +
  scale_color_manual(values = c("#ff9f1c", "#2EC4B6"))
p3 = map1 + coord_cartesian(xlim=c(115, 175), ylim=c(-65, 10)) + 
  geom_jitter(data = temp, aes(Longitude, Latitude, col = Status), width = 2, height = 2) +
  scale_color_manual(values = c("#ff9f1c", "#2EC4B6"))

#Plotting all the maps together! 
aus_map = cowplot::plot_grid(p3 + theme(legend.position = "none"), get_legend(p1), ncol = 2, rel_widths = c(1, 0.9))
ligne = cowplot::plot_grid(p1 + theme(legend.position = "none"), aus_map, ncol = 1,  rel_heights = c(1, 0.7))
cowplot::plot_grid(p2 + theme(legend.position = "none"), ligne, ncol = 2, rel_widths = c(0.7, 1)) 

ggsave(paste0(fig_dir, "Hybrids_scatter_pie.pdf"), width = 18, height = 10, units = "cm")

Principal Component Analysis

As a second method to investigate the population structure of Z.tritici at the world-wide scale, I chose to do a principal component analysis based on a subset of the SNPs. The results from the PCA and from the clutering analysis are coherent with each other: Oceania separates into 3 clusters (one in New_Zealand, and two in Australia) and the North American isolates form two separate clusters. Higher K values also distinguish a Middle-Eastern/African cluster from the European cluster, representing the two extreme points of the gradient found between these populations in the PCA. Despite the high number of isolates from Europe, it’s interesting to see that no clustering appears there.

snpgdsVCF2GDS(paste0(vcf_dir, vcf_name, ".recode.vcf"),
              paste0(PopStr_dir, vcf_name, ".recode.gds"), method="biallelic.only")
genofile <- snpgdsOpen(paste0(PopStr_dir, vcf_name, ".recode.gds"))
pca <-snpgdsPCA(genofile)
snpgdsClose(genofile)

pca2 = as_tibble(pca$eigenvect) %>% dplyr::select(V1:V8)
colnames(pca2) = c("PC1", "PC2", "PC3", "PC4", "PC5", "PC6", "PC7", "PC8")
pca2 = pca2 %>%
  dplyr::mutate(sample_id = pca$sample.id ) %>%
  dplyr::right_join(., status_admix, by = c("sample_id" = "Sample")) %>%
  filter(!is.na(PC1))

#Writing table to store PCA results
pca2 %>%
dplyr::select(ID_file = sample_id, PC1, PC2, PC3, PC4, PC5, PC6, PC7, PC8, 
              Continent, Country, Latitude, Longitude) %>%
write_tsv(., file = paste0(PopStr_dir, vcf_name, ".PCA_results.tab"),
            col_names = T)

#
as.tibble(pca$eigenval[!is.na(pca$eigenval)]) %>%
  ggplot(aes(x = c(1:length(pca$eigenval[!is.na(pca$eigenval)])),
             y =value)) + 
  geom_point() +
  theme_bw() + 
  labs(y = "Eigenvalue", x = "Principal component")

eigen_sum = sum(pca$eigenval[!is.na(pca$eigenval)])
for_plot = pca2 %>% mutate(Cluster = ifelse(Status == "Hybrid", Status, Cluster))

#Defining colors per cluster based on cluster
myColors2 <- c("grey", "#129eba", "#3F6E0C", "#DA4167", "#ffba0a", "#129eba", "#129eba", 
               "#8fa253", "#A20106", "#A20106", "#3F6E0C", "#8fa253")
myShapes <- c(1, 1, 1, 1, 1, 2, 0, 
               1, 1, 0, 0, 0)

names(myColors2) = levels(factor(for_plot$Cluster))
names(myShapes) = levels(factor(for_plot$Cluster))
Color_Cluster = ggplot2::scale_colour_manual(name = "Cluster", values = myColors2)
Fill_Cluster = ggplot2::scale_fill_manual(name = "Cluster", values = myColors2)
Shape_Cluster = ggplot2::scale_shape_manual(name = "Cluster", values = myShapes)

## Detailed color scheme 
K_colors = c("grey", 
             "#8ECAE6", #V1 Aus (TAS)
             "#49810E", #V10 USA
             "#E16684", #V11 North Africa
             "#FFBA0A", #V2 Europe
             "#126782", #V3 NZ
             "#219EBC", #V4 Australia (NSW)
             "#8FA253", #V5 Uruguay + Argentina
             "#650104", #V6 Israel + Turkey
             "#DE020A", #V7 Iran
             "#2A4908", #V8 Canada
             "#B3C186") #V9 Boliva + Chile + Ecuador
names(K_colors) = levels(factor(for_plot$Cluster))
Color_Cluster2 = ggplot2::scale_colour_manual(name = "Cluster", values = K_colors)
Fill_Cluster2 = ggplot2::scale_fill_manual(name = "Cluster", values = K_colors)


# Dot plots
p1 = ggplot(for_plot, aes(x = PC1, y= PC2, shape = Cluster)) +
  geom_point(aes(color = Cluster)) +
  labs(x = paste0("PC 1 (", round(pca$eigenval[1]*100/eigen_sum, 2), "%)"),
       y = paste0("PC 2 (", round(pca$eigenval[2]*100/eigen_sum, 2), "%)")) +
  Color_Cluster + Shape_Cluster + 
  theme_bw()

p2 = ggplot(for_plot, aes(x = PC3, y= PC4, shape = Cluster)) +
  geom_point(aes(color = Cluster)) +
  labs(x = paste0("PC 3 (", round(pca$eigenval[3]*100/eigen_sum, 2), "%)"),
       y = paste0("PC 4 (", round(pca$eigenval[4]*100/eigen_sum, 2), "%)")) +
  Color_Cluster + Shape_Cluster +
  theme_bw()

p3 = ggplot(for_plot, aes(x = PC5, y= PC6, shape = Cluster)) +
  geom_point(aes(color = Cluster)) +
  labs(x = paste0("PC 5 (", round(pca$eigenval[5]*100/eigen_sum, 2), "%)"),
       y = paste0("PC 6 (", round(pca$eigenval[6]*100/eigen_sum, 2), "%)")) +
  Color_Cluster + Shape_Cluster +
  theme_bw()

p4 = ggplot(for_plot, aes(x = PC7, y= PC8, shape = Cluster)) +
  geom_point(aes(color = Cluster)) +
  labs(x = paste0("PC 7 (", round(pca$eigenval[7]*100/eigen_sum, 2), "%)"),
       y = paste0("PC 8 (", round(pca$eigenval[8]*100/eigen_sum, 2), "%)")) +
  Color_Cluster + Shape_Cluster +
  theme_bw()

PCA_grid  = cowplot::plot_grid(p1 + theme(legend.position = "none"), p2 + theme(legend.position = "none"),
                               p3 + theme(legend.position = "none"), p4 + theme(legend.position = "none"))
cowplot::plot_grid(PCA_grid, get_legend(p1 + theme(legend.position = "bottom")), ncol =1, rel_heights = c(1, 0.2))

p11 = filter(for_plot, Cluster != "Hybrid") %>%
  ggplot(aes(PC1, color = Cluster, fill = Cluster)) + 
  geom_density(alpha = .3)+
  Color_Cluster + Fill_Cluster +
  theme_cowplot() + 
  theme(axis.title = element_blank(), axis.text = element_blank(),
        plot.margin = unit(c(0,0,0,0), "cm"), axis.ticks = element_blank())

p12 = filter(for_plot, Cluster != "Hybrid") %>%
  ggplot(aes(PC2, color = Cluster, fill = Cluster)) + 
  geom_density(alpha = .3)+
  Color_Cluster + Fill_Cluster +
  theme_cowplot() + 
  theme(axis.title = element_blank(), axis.text = element_blank(),
        plot.margin = unit(c(0,0,0,0), "cm"), axis.ticks = element_blank())

p13 = filter(for_plot, Cluster != "Hybrid") %>%
  ggplot(aes(PC3, color = Cluster, fill = Cluster)) + 
  geom_density(alpha = .3)+
  Color_Cluster + Fill_Cluster +
  theme_cowplot() + 
  theme(axis.title = element_blank(), axis.text = element_blank(),
        plot.margin = unit(c(0,0,0,0), "cm"), axis.ticks = element_blank())

cowplot::plot_grid(p11+ theme(legend.position = "none"), 
                   p12 + theme(legend.position = "none"), 
                   p13 + theme(legend.position = "none"),
                   ncol = 1, align = "hv") 


ggMarginal(ggplot(for_plot, aes(x = PC2, y= PC3, shape = Cluster)) +
  geom_point(aes(color = Cluster)) +
  labs(x = paste0("PC 2 (", round(pca$eigenval[2]*100/eigen_sum, 2), "%)"),
       y = paste0("PC 3 (", round(pca$eigenval[3]*100/eigen_sum, 2), "%)")) +
  Color_Cluster + Shape_Cluster +
  theme_bw() + theme(legend.position = "bottom"), groupColour = TRUE, groupFill = TRUE)

ggplot2::theme_set(theme_light())
p = ggpairs(pca2, columns = c(1:6),
            ggplot2::aes(col=Continent, fill = Continent, alpha = 0.6),
            title = "PCA based thinned SNPs",
            upper = list(continuous = "points", combo = "box_no_facet"))

for(i in 1:p$nrow) {
  for(j in 1:p$ncol){
    p[i,j] <- p[i,j] + theme_light() + Color_Continent + Fill_Continent
  }
}

p

Population trees

In the steps before, I have learned about population history indirectly by inferring genetic populations from the genomic data. The relationship between the population and the underlying demography is not explicit in these however. It is possible however to infer splits between populations and create a population tree. Here, I use treemix, which takes into account the possibility of gene flow between populations and indeed test of it in the process of creating a population tree.

Because the populations in the clustering were not perfectly distinct from one another, I start with “discretized” populations by choosing only the isolates with high ancestry in one of the sNMF clusters.


#With only Zymoseptoria tritici (but more markers)
~/Documents/Software/vcftools_jydu/src/cpp/vcftools \
  --vcf ${VCFDIR}$VCFNAME.recode.vcf \
  --keep ${POPSTR}$VCFNAME.high_anc_coef_snmf.ind \
  --remove-filtered-all --extract-FORMAT-info GT \
  --max-missing 1.0 --min-alleles 2 --max-alleles 2 \
  --maf 0.05 \
  --out ${POPSTR}$VCFNAME.high_anc_coef_snmf

cat  ${POPSTR}$VCFNAME.high_anc_coef_snmf.GT.FORMAT | cut -f 3- \
   >  ${POPSTR}$VCFNAME.high_anc_coef_snmf.GT.FORMAT2

#With only the positions found in the outgroup
~/Documents/Software/vcftools_jydu/src/cpp/vcftools \
   --vcf ${VCFDIR}$VCFNAME.recode.vcf \
   --keep ${POPSTR}$VCFNAME.high_anc_coef_snmf.ind \
   --remove-filtered-all --max-missing 1.0 --min-alleles 2 --max-alleles 2 --maf 0.05 \
   --positions ${VCFDIR}est_sfs_position_list.for_vcftools \
   --extract-FORMAT-info GT \
   --out ${POPSTR}High_anc_coef_only_outgroups_positions

In the treemix analysis, I want to use the sequences from the strains Zpa63 and Za17, respectively from the species Zymoseptoria passerinii and Zymoseptoria ardabilia, as outgroups. Here is how I went from the fully assembled sequences to the snps to add to the Z. tritici intra-species variants. There are also some data wrangling and table reformatting steps to get the variants ready for Treemix.

#For both Zpa63 and Za17
./dnadiff \
  -p /data2/alice/WW_project/1_Variant_calling/3_Per_sample_calling/Zpa63 \
  /data2/alice/WW_project/0_Data/Zymoseptoria_tritici.MG2.dna.toplevel.mt+.fa \
  /legserv/NGS_data/Zymoseptoria/Zt_Reference_genomes/Sister_species_Feurtey2020/Zpa63_softmasked_for_publication.fa 
  
./delta-filter \
   -1 /data2/alice/WW_project/1_Variant_calling/3_Per_sample_calling/Zpa63.delta \
   > /data2/alice/WW_project/1_Variant_calling/3_Per_sample_calling/Zpa63.filtered.delta
   
./show-coords -r -T /data2/alice/WW_project/1_Variant_calling/3_Per_sample_calling/Zpa63.filtered.delta | \
   awk 'BEGIN {OFS = "\t"} NR > 4 {print $8, $1, $2}' \
   > /data2/alice/WW_project/1_Variant_calling/3_Per_sample_calling/Zpa63.filtered.bed

# Getting the single alignments for the two species
~/Software/bedtools intersect \
   -a /data2/alice/WW_project/1_Variant_calling/3_Per_sample_calling/Za17.filtered.bed \
   -b /data2/alice/WW_project/1_Variant_calling/3_Per_sample_calling/Zpa63.filtered.bed \
   > /data2/alice/WW_project/1_Variant_calling/3_Per_sample_calling/Za17_filtered_insersect_Zpa63_filtered.bed

# Filtering the snps to keep only the ones in the good intervals
~/Software/bedtools intersect -a /data2/alice/WW_project/1_Variant_calling/3_Per_sample_calling/Zpa63.snps ^C

awk 'BEGIN {OFS = "\t"} {print $11, $1, $1, $2, $3}' \
    /data2/alice/WW_project/1_Variant_calling/3_Per_sample_calling/Zpa63.snps \
    > /data2/alice/WW_project/1_Variant_calling/3_Per_sample_calling/Zpa63.snps.bed ;  
    
echo -e "CHROM\tPOS\tREF\tZpa63" > /data2/alice/WW_project/1_Variant_calling/3_Per_sample_calling/Zpa63.filtered.snps
~/Software/bedtools intersect \
    -a /data2/alice/WW_project/1_Variant_calling/3_Per_sample_calling/Zpa63.snps.bed \
    -b /data2/alice/WW_project/1_Variant_calling/3_Per_sample_calling/Za17_filtered_insersect_Zpa63_filtered.bed \
    -wa  | cut -f 1,2,4,5 >> /data2/alice/WW_project/1_Variant_calling/3_Per_sample_calling/Zpa63.filtered.snps

awk 'BEGIN {OFS = "\t"} {print $11, $1, $1, $2, $3}' \
    /data2/alice/WW_project/1_Variant_calling/3_Per_sample_calling/Za17.snps \
    > /data2/alice/WW_project/1_Variant_calling/3_Per_sample_calling/Za17.snps.bed ; 
    
echo -e "CHROM\tPOS\tREF\tZa17" > /data2/alice/WW_project/1_Variant_calling/3_Per_sample_calling/Za17.filtered.snps
~/Software/bedtools intersect \
    -a /data2/alice/WW_project/1_Variant_calling/3_Per_sample_calling/Za17.snps.bed \
    -b /data2/alice/WW_project/1_Variant_calling/3_Per_sample_calling/Za17_filtered_insersect_Zpa63_filtered.bed \
    -wa | cut -f 1,2,4,5 >> /data2/alice/WW_project/1_Variant_calling/3_Per_sample_calling/Za17.filtered.snps

    
    
# Filtering the counts from the whole vcf to keep only the ones in the good intervals
#!/bin/bash 

source $1

${VCFTOOLS_PATH} \
     --gzvcf ${vcf_dir}${VCFBasename}.genotyped.ALL.filtered.clean.AB_filtered.variants.good_samples.max-m-80.vcf.gz  \
     --min-alleles 2 --max-alleles 2 --mac 1 --remove-indels \
     --out ${vcf_dir}${VCFBasename}.genotyped.ALL.filtered.clean.AB_filtered.variants.good_samples.max-m-80.biall_SNPs.intersect_Za_Zpa \
     --counts  --bed  /data2/alice/WW_project/1_Variant_calling/3_Per_sample_calling/Za17_filtered_insersect_Zpa63_filtered.bed
#Read outgroup SNPS
 temp = inner_join( read_delim(paste0(VAR_dir, "Za17.filtered.snps")) %>%
  filter(Za17 != "." & REF != ".") %>% distinct(),
  read_delim(paste0(VAR_dir, "Zpa63.filtered.snps")) %>%
  filter(Zpa63 != "." & REF != ".") %>% distinct()) 
 
#Merge to previously created GT table
 data_for_treemix = inner_join(read_tsv(paste0(PopStr_dir, "High_anc_coef_only_outgroups_positions.alleles_info"), 
                                               col_names = c("CHROM", "POS","REF", "ALT")),
                               read_tsv(paste0(PopStr_dir, "High_anc_coef_only_outgroups_positions.GT.FORMAT"))) %>%
   left_join(temp)
 
 
# This was simply to check that everything was matching as it should
#temp = cbind(read_delim(paste0(VAR_dir, "est_sfs_position_list.txt"), delim = "\t"),
#    read_delim(paste0(VAR_dir, "est_sfs_pval.txt"), delim = " ", skip = 7, 
#               col_names = c("Line_nb", "Conf_index", "Probability")) %>%
#      dplyr::select(Line_nb, Probability)) %>%
#mutate(Ancestral_allele = ifelse(Probability > 0.95, Major_allele, ifelse(Probability < 0.05, Minor_allele, "Uncertain"))) 
#data_for_treemix = data_for_treemix %>% left_join(temp)
 
# REmoving positions with third allele
# Changing the format to 0/1
data_for_treemix = data_for_treemix %>%
  filter(is.na(Za17) | Za17 == ALT) %>%
  filter(is.na(Zpa63) | Zpa63 == ALT) %>% 
  dplyr::mutate(Za17 = ifelse(is.na(Za17), 0, 1),
                Zpa63 = ifelse(is.na(Zpa63), 0, 1)) 


data_for_treemix %>% dplyr::select(-CHROM, -POS, -REF, -ALT) %>%
  write_delim(paste0(PopStr_dir, "High_anc_coef_only_outgroups_positions.with_outgroups.GT.FORMAT2"), delim = "\t")
#conda activate r-reticulate
from collections import defaultdict

#For each isolate, store its pop (as in sampling site) in a dictionary
dict_pop = dict(zip(list(r.high_anc_coef_snmf["Sample"]) + ["Za17", "Zpa63"],
    list(r.high_anc_coef_snmf["Cluster"]) + ["Za17", "Zpa63"]))

#Keep a list of the pop names/coordinates to write in the same order later
all_pops = sorted(list(set(list(r.high_anc_coef_snmf["Cluster"])+ ["Za17", "Zpa63"])))
#out_name = r.PopStr_dir + r.vcf_name + ".high_anc_coef_snmf.treemix"
out_name =  "/Users/afeurtey/Documents/Postdoc_Bruce/Projects/WW_project/2_Population_structure/High_anc_coef_only_outgroups_positions.with_outgroups.treemix"

out = open(out_name, "w")
shutup = out.write(" ".join(all_pops) + "\n")

#with open(r.PopStr_dir + r.vcf_name + ".high_anc_coef_snmf.GT.FORMAT2", "r") as input_snps :
with open("/Users/afeurtey/Documents/Postdoc_Bruce/Projects/WW_project/2_Population_structure/High_anc_coef_only_outgroups_positions.with_outgroups.GT.FORMAT2", "r") as input_snps :
  for i, snp in enumerate(input_snps) :
    
    #Setting two dictionaries with values at 0
    dict_snp0 = defaultdict(int)
    dict_snp1 = defaultdict(int)
    Lets_write = True
    
    #The first line is the name of the isolates
    if i == 0 :
      indv = snp.strip().split("\t")
      Lets_write = False
    else :
      #Keeping isolate name and allelic value together
      alleles = zip(indv, snp.strip().split("\t"))
            
      #...and counting the O and 1 based on the pop
      for ind, allele in alleles:
        if allele == "0" :
          dict_snp0[dict_pop[ind]] += 1
        elif allele == "1" :
          dict_snp1[dict_pop[ind]] += 1
        else :
          print("Only biallelic please!!!!")
          Lets_write = False
    #If I have not found anything weird, I will write the result to the output file.
    if Lets_write :
      shutup = out.write(" ".join([",".join([str(dict_snp0[pop]), str(dict_snp1[pop])])  for pop in all_pops]) + "\n")


print("All done!")
out.close()

Once the tables are properly formatted, it’s time to run the treemix software and to graphically explore the results.

POPSTR="/Users/afeurtey/Documents/Postdoc_Bruce/Projects/WW_project/2_Population_structure/"



#With outgroup
if [ -f ${POPSTR}High_anc_coef_only_outgroups_positions.with_outgroups ] ;
then
  gzip ${POPSTR}High_anc_coef_only_outgroups_positions.with_outgroups.treemix
fi

treemix \
  -i ${POPSTR}High_anc_coef_only_outgroups_positions.with_outgroups.treemix.gz \
  -o ${POPSTR}High_anc_coef_only_outgroups_positions.with_outgroups.m0.treemix.out \
  -m 0 -root Zpa63
treemix \
  -i ${POPSTR}High_anc_coef_only_outgroups_positions.with_outgroups.treemix.gz \
  -o ${POPSTR}High_anc_coef_only_outgroups_positions.with_outgroups.m1.treemix.out \
  -m 1 -root Zpa63
treemix \
  -i ${POPSTR}High_anc_coef_only_outgroups_positions.with_outgroups.treemix.gz \
  -o ${POPSTR}High_anc_coef_only_outgroups_positions.with_outgroups.m2.treemix.out \
  -m 2 -root Zpa63
treemix \
  -i ${POPSTR}High_anc_coef_only_outgroups_positions.with_outgroups.treemix.gz \
  -o ${POPSTR}High_anc_coef_only_outgroups_positions.with_outgroups.m3.treemix.out \
  -m 3 -root Zpa63
treemix \
  -i ${POPSTR}High_anc_coef_only_outgroups_positions.with_outgroups.treemix.gz \
  -o ${POPSTR}High_anc_coef_only_outgroups_positions.with_outgroups.m4.treemix.out \
  -m 4 -root Zpa63
treemix \
  -i ${POPSTR}High_anc_coef_only_outgroups_positions.with_outgroups.treemix.gz \
  -o ${POPSTR}High_anc_coef_only_outgroups_positions.with_outgroups.m5.treemix.out \
  -m 5 -root Zpa63
treemix \
  -i ${POPSTR}High_anc_coef_only_outgroups_positions.with_outgroups.treemix.gz \
  -o ${POPSTR}High_anc_coef_only_outgroups_positions.with_outgroups.m6.treemix.out \
  -m 6 -root Zpa63

treemix \
  -i ${POPSTR}High_anc_coef_only_outgroups_positions.with_outgroups.treemix.gz \
  -o ${POPSTR}High_anc_coef_only_outgroups_positions.with_outgroups.m7.treemix.out \
  -m 7 -root Zpa63
source("~/Documents/Software/treemix-1.13/src/plotting_funcs.R")
#p_cluster
t = plot_tree(paste0(PopStr_dir, "High_anc_coef_only_outgroups_positions.with_outgroups.m0.treemix.out"))
##     V1    V2       V3      V4      V5  V6  V7 V8  V9 V10
## 1    1 Zpa63 NOT_ROOT NOT_MIG     TIP 305  NA NA  NA  NA
## 2    2  <NA> NOT_ROOT NOT_MIG NOT_TIP 305   4  1  32  11
## 3    3    V6 NOT_ROOT NOT_MIG     TIP  16  NA NA  NA  NA
## 4    4  Za17 NOT_ROOT NOT_MIG     TIP   2  NA NA  NA  NA
## 5   15    V7 NOT_ROOT NOT_MIG     TIP  16  NA NA  NA  NA
## 6   16  <NA> NOT_ROOT NOT_MIG NOT_TIP 172  15  1   3   1
## 7   31    V4 NOT_ROOT NOT_MIG     TIP 136  NA NA  NA  NA
## 8   32  <NA> NOT_ROOT NOT_MIG NOT_TIP   2 172  3 104   8
## 9   51   V10 NOT_ROOT NOT_MIG     TIP  76  NA NA  NA  NA
## 10  52  <NA> NOT_ROOT NOT_MIG NOT_TIP 104  76  2 256   4
## 11  75    V8 NOT_ROOT NOT_MIG     TIP  76  NA NA  NA  NA
## 12  76  <NA> NOT_ROOT NOT_MIG NOT_TIP  52  51  1  75   1
## 13 103    V9 NOT_ROOT NOT_MIG     TIP 304  NA NA  NA  NA
## 14 104  <NA> NOT_ROOT NOT_MIG NOT_TIP  32  52  6 304   2
## 15 135    V1 NOT_ROOT NOT_MIG     TIP 136  NA NA  NA  NA
## 16 136  <NA> NOT_ROOT NOT_MIG NOT_TIP 212  31  1 135   1
## 17 171   V11 NOT_ROOT NOT_MIG     TIP 172  NA NA  NA  NA
## 18 172  <NA> NOT_ROOT NOT_MIG NOT_TIP  32  16  2 171   1
## 19 211    V3 NOT_ROOT NOT_MIG     TIP 212  NA NA  NA  NA
## 20 212  <NA> NOT_ROOT NOT_MIG NOT_TIP 256 136  2 211   1
## 21 255    V2 NOT_ROOT NOT_MIG     TIP 256  NA NA  NA  NA
## 22 256  <NA> NOT_ROOT NOT_MIG NOT_TIP  52 212  3 255   1
## 23 303    V5 NOT_ROOT NOT_MIG     TIP 304  NA NA  NA  NA
## 24 304  <NA> NOT_ROOT NOT_MIG NOT_TIP 104 303  1 103   1
## 25 305  <NA>     ROOT NOT_MIG NOT_TIP 305   1  1   2  12
##                                                                                                                                                                                                                                                                                                                     V11
## 1                                                                                                                                                                                                                                                                                                       Zpa63:5.733e-07
## 2                     (Za17:1.1466e-06,(((V7:0.0253622,V6:0.00582189):0.011378,V11:0.0234466):0.00913495,(((V10:0.0294537,V8:0.0457607):0.00834699,(((V4:0.0575342,V1:0.0439875):0.0107982,V3:0.0339801):0.0115416,V2:0):0.00477896):0.00342986,(V5:0.0106011,V9:0.0319032):0.00930602):0.00846123):0.182701):5.733e-07
## 3                                                                                                                                                                                                                                                                                                         V6:0.00582189
## 4                                                                                                                                                                                                                                                                                                       Za17:1.1466e-06
## 5                                                                                                                                                                                                                                                                                                          V7:0.0253622
## 6                                                                                                                                                                                                                                                                                 (V7:0.0253622,V6:0.00582189):0.011378
## 7                                                                                                                                                                                                                                                                                                          V4:0.0575342
## 8                                                 (((V7:0.0253622,V6:0.00582189):0.011378,V11:0.0234466):0.00913495,(((V10:0.0294537,V8:0.0457607):0.00834699,(((V4:0.0575342,V1:0.0439875):0.0107982,V3:0.0339801):0.0115416,V2:0):0.00477896):0.00342986,(V5:0.0106011,V9:0.0319032):0.00930602):0.00846123):0.182701
## 9                                                                                                                                                                                                                                                                                                         V10:0.0294537
## 10                                                                                                                                                                                ((V10:0.0294537,V8:0.0457607):0.00834699,(((V4:0.0575342,V1:0.0439875):0.0107982,V3:0.0339801):0.0115416,V2:0):0.00477896):0.00342986
## 11                                                                                                                                                                                                                                                                                                         V8:0.0457607
## 12                                                                                                                                                                                                                                                                              (V10:0.0294537,V8:0.0457607):0.00834699
## 13                                                                                                                                                                                                                                                                                                         V9:0.0319032
## 14                                                                                                                            (((V10:0.0294537,V8:0.0457607):0.00834699,(((V4:0.0575342,V1:0.0439875):0.0107982,V3:0.0339801):0.0115416,V2:0):0.00477896):0.00342986,(V5:0.0106011,V9:0.0319032):0.00930602):0.00846123
## 15                                                                                                                                                                                                                                                                                                         V1:0.0439875
## 16                                                                                                                                                                                                                                                                                (V4:0.0575342,V1:0.0439875):0.0107982
## 17                                                                                                                                                                                                                                                                                                        V11:0.0234466
## 18                                                                                                                                                                                                                                                     ((V7:0.0253622,V6:0.00582189):0.011378,V11:0.0234466):0.00913495
## 19                                                                                                                                                                                                                                                                                                         V3:0.0339801
## 20                                                                                                                                                                                                                                                       ((V4:0.0575342,V1:0.0439875):0.0107982,V3:0.0339801):0.0115416
## 21                                                                                                                                                                                                                                                                                                                 V2:0
## 22                                                                                                                                                                                                                                     (((V4:0.0575342,V1:0.0439875):0.0107982,V3:0.0339801):0.0115416,V2:0):0.00477896
## 23                                                                                                                                                                                                                                                                                                         V5:0.0106011
## 24                                                                                                                                                                                                                                                                               (V5:0.0106011,V9:0.0319032):0.00930602
## 25 (Zpa63:5.733e-07,(Za17:1.1466e-06,(((V7:0.0253622,V6:0.00582189):0.011378,V11:0.0234466):0.00913495,(((V10:0.0294537,V8:0.0457607):0.00834699,(((V4:0.0575342,V1:0.0439875):0.0107982,V3:0.0339801):0.0115416,V2:0):0.00477896):0.00342986,(V5:0.0106011,V9:0.0319032):0.00930602):0.00846123):0.182701):5.733e-07);
##               x          y       ymin       ymax
## 1  0.0000005733 0.96153846 0.92307692 1.00000000
## 2  0.0000005733 0.84615385 0.00000000 0.92307692
## 3  0.2090364133 0.73076923 0.69230769 0.76923077
## 4  0.0000017199 0.88461538 0.84615385 0.92307692
## 5  0.2285767233 0.80769231 0.76923077 0.84615385
## 6  0.2032145233 0.76923077 0.69230769 0.84615385
## 7  0.2792456233 0.42307692 0.38461538 0.46153846
## 8  0.1827015733 0.61538462 0.00000000 0.84615385
## 9  0.2323933533 0.57692308 0.53846154 0.61538462
## 10 0.1945926633 0.46153846 0.15384615 0.61538462
## 11 0.2487003533 0.50000000 0.46153846 0.53846154
## 12 0.2029396533 0.53846154 0.46153846 0.61538462
## 13 0.2323720233 0.03846154 0.00000000 0.07692308
## 14 0.1911628033 0.15384615 0.00000000 0.61538462
## 15 0.2656989233 0.34615385 0.30769231 0.38461538
## 16 0.2217114233 0.38461538 0.30769231 0.46153846
## 17 0.2152831233 0.65384615 0.61538462 0.69230769
## 18 0.1918365233 0.69230769 0.61538462 0.84615385
## 19 0.2448933233 0.26923077 0.23076923 0.30769231
## 20 0.2109132233 0.30769231 0.23076923 0.46153846
## 21 0.1993716233 0.19230769 0.15384615 0.23076923
## 22 0.1993716233 0.23076923 0.15384615 0.46153846
## 23 0.2110699233 0.11538462 0.07692308 0.15384615
## 24 0.2004688233 0.07692308 0.00000000 0.15384615
## 25 0.0000000000 0.92307692 0.00000000 1.00000000

##  [1] 0.0000005733 0.2090364133 0.0000017199 0.2285767233 0.2792456233
##  [6] 0.2323933533 0.2487003533 0.2323720233 0.2656989233 0.2152831233
## [11] 0.2448933233 0.1993716233 0.2110699233
## [1] 0.003
## [1] "mse 0.000965112171597633"
t = plot_tree(paste0(PopStr_dir, "High_anc_coef_only_outgroups_positions.with_outgroups.m1.treemix.out"))
##     V1    V2       V3      V4      V5  V6  V7 V8  V9 V10
## 1    1 Zpa63 NOT_ROOT NOT_MIG     TIP 305  NA NA  NA  NA
## 2    2  <NA> NOT_ROOT NOT_MIG NOT_TIP 305   4  1  32  11
## 3    3    V6 NOT_ROOT NOT_MIG     TIP  16  NA NA  NA  NA
## 4    4  Za17 NOT_ROOT NOT_MIG     TIP   2  NA NA  NA  NA
## 5   15    V7 NOT_ROOT NOT_MIG     TIP  16  NA NA  NA  NA
## 6   16  <NA> NOT_ROOT NOT_MIG NOT_TIP 172  15  1   3   1
## 7   31    V4 NOT_ROOT NOT_MIG     TIP 136  NA NA  NA  NA
## 8   32  <NA> NOT_ROOT NOT_MIG NOT_TIP   2 104  8 172   3
## 9   51   V10 NOT_ROOT NOT_MIG     TIP  76  NA NA  NA  NA
## 10  52  <NA> NOT_ROOT NOT_MIG NOT_TIP 104  76  2 256   4
## 11  75    V8 NOT_ROOT NOT_MIG     TIP  76  NA NA  NA  NA
## 12  76  <NA> NOT_ROOT NOT_MIG NOT_TIP  52  51  1  75   1
## 13 103    V9 NOT_ROOT NOT_MIG     TIP 304  NA NA  NA  NA
## 14 104  <NA> NOT_ROOT NOT_MIG NOT_TIP  32  52  6 304   2
## 15 135    V1 NOT_ROOT NOT_MIG     TIP 136  NA NA  NA  NA
## 16 136  <NA> NOT_ROOT NOT_MIG NOT_TIP 212  31  1 135   1
## 17 171   V11 NOT_ROOT NOT_MIG     TIP 172  NA NA  NA  NA
## 18 172  <NA> NOT_ROOT NOT_MIG NOT_TIP  32  16  2 171   1
## 19 211    V3 NOT_ROOT NOT_MIG     TIP 212  NA NA  NA  NA
## 20 212  <NA> NOT_ROOT NOT_MIG NOT_TIP 256 211  1 136   2
## 21 255    V2 NOT_ROOT NOT_MIG     TIP 256  NA NA  NA  NA
## 22 256  <NA> NOT_ROOT NOT_MIG NOT_TIP  52 255  1 212   3
## 23 303    V5 NOT_ROOT NOT_MIG     TIP 304  NA NA  NA  NA
## 24 304  <NA> NOT_ROOT NOT_MIG NOT_TIP 104 303  1 103   1
## 25 305  <NA>     ROOT NOT_MIG NOT_TIP 305   1  1   2  12
## 26 321  <NA> NOT_ROOT     MIG NOT_TIP  32 172 NA  NA  NA
##                                                                                                                                                                                                                                                                                                                    V11
## 1                                                                                                                                                                                                                                                                                                      Zpa63:5.733e-07
## 2                     (Za17:1.1466e-06,((((V10:0.0138305,V8:0.181607):0.0229393,(V2:0,(V3:0.0339801,(V4:0.0575342,V1:0.0439875):0.0107982):0.0115416):0.000971518):0.00674799,(V5:0.0106011,V9:0.0319032):0.00826177):0.00947709,((V7:0.0253622,V6:0.00582189):0.011378,V11:0.0234466):0.00951656):0.182304):5.733e-07
## 3                                                                                                                                                                                                                                                                                                        V6:0.00582189
## 4                                                                                                                                                                                                                                                                                                      Za17:1.1466e-06
## 5                                                                                                                                                                                                                                                                                                         V7:0.0253622
## 6                                                                                                                                                                                                                                                                                (V7:0.0253622,V6:0.00582189):0.011378
## 7                                                                                                                                                                                                                                                                                                         V4:0.0575342
## 8                                                 ((((V10:0.0138305,V8:0.181607):0.0229393,(V2:0,(V3:0.0339801,(V4:0.0575342,V1:0.0439875):0.0107982):0.0115416):0.000971518):0.00674799,(V5:0.0106011,V9:0.0319032):0.00826177):0.00947709,((V7:0.0253622,V6:0.00582189):0.011378,V11:0.0234466):0.00951656):0.182304
## 9                                                                                                                                                                                                                                                                                                        V10:0.0138305
## 10                                                                                                                                                                                ((V10:0.0138305,V8:0.181607):0.0229393,(V2:0,(V3:0.0339801,(V4:0.0575342,V1:0.0439875):0.0107982):0.0115416):0.000971518):0.00674799
## 11                                                                                                                                                                                                                                                                                                         V8:0.181607
## 12                                                                                                                                                                                                                                                                               (V10:0.0138305,V8:0.181607):0.0229393
## 13                                                                                                                                                                                                                                                                                                        V9:0.0319032
## 14                                                                                                                            (((V10:0.0138305,V8:0.181607):0.0229393,(V2:0,(V3:0.0339801,(V4:0.0575342,V1:0.0439875):0.0107982):0.0115416):0.000971518):0.00674799,(V5:0.0106011,V9:0.0319032):0.00826177):0.00947709
## 15                                                                                                                                                                                                                                                                                                        V1:0.0439875
## 16                                                                                                                                                                                                                                                                               (V4:0.0575342,V1:0.0439875):0.0107982
## 17                                                                                                                                                                                                                                                                                                       V11:0.0234466
## 18                                                                                                                                                                                                                                                    ((V7:0.0253622,V6:0.00582189):0.011378,V11:0.0234466):0.00951656
## 19                                                                                                                                                                                                                                                                                                        V3:0.0339801
## 20                                                                                                                                                                                                                                                      (V3:0.0339801,(V4:0.0575342,V1:0.0439875):0.0107982):0.0115416
## 21                                                                                                                                                                                                                                                                                                                V2:0
## 22                                                                                                                                                                                                                                   (V2:0,(V3:0.0339801,(V4:0.0575342,V1:0.0439875):0.0107982):0.0115416):0.000971518
## 23                                                                                                                                                                                                                                                                                                        V5:0.0106011
## 24                                                                                                                                                                                                                                                                              (V5:0.0106011,V9:0.0319032):0.00826177
## 25 (Zpa63:5.733e-07,(Za17:1.1466e-06,((((V10:0.0138305,V8:0.181607):0.0229393,(V2:0,(V3:0.0339801,(V4:0.0575342,V1:0.0439875):0.0107982):0.0115416):0.000971518):0.00674799,(V5:0.0106011,V9:0.0319032):0.00826177):0.00947709,((V7:0.0253622,V6:0.00582189):0.011378,V11:0.0234466):0.00951656):0.182304):5.733e-07);
## 26                                                                                                                                                                                                                                                                                                                <NA>
##               x          y       ymin       ymax
## 1  0.0000005733 0.96153846 0.92307692 1.00000000
## 2  0.0000005733 0.84615385 0.00000000 0.92307692
## 3  0.2090210233 0.11538462 0.07692308 0.15384615
## 4  0.0000017199 0.88461538 0.84615385 0.92307692
## 5  0.2285613333 0.19230769 0.15384615 0.23076923
## 6  0.2031991333 0.15384615 0.07692308 0.23076923
## 7  0.2793751713 0.50000000 0.46153846 0.53846154
## 8  0.1823045733 0.23076923 0.00000000 0.84615385
## 9  0.2352994533 0.80769231 0.76923077 0.84615385
## 10 0.1985296533 0.69230769 0.38461538 0.84615385
## 11 0.2728008709 0.73076923 0.69230769 0.76923077
## 12 0.2214689533 0.76923077 0.69230769 0.84615385
## 13 0.2319466333 0.26923077 0.23076923 0.30769231
## 14 0.1917816633 0.38461538 0.23076923 0.84615385
## 15 0.2658284713 0.42307692 0.38461538 0.46153846
## 16 0.2218409713 0.46153846 0.38461538 0.53846154
## 17 0.2152677333 0.03846154 0.00000000 0.07692308
## 18 0.1918211333 0.07692308 0.00000000 0.23076923
## 19 0.2450228713 0.57692308 0.53846154 0.61538462
## 20 0.2110427713 0.53846154 0.38461538 0.61538462
## 21 0.1995011713 0.65384615 0.61538462 0.69230769
## 22 0.1995011713 0.61538462 0.38461538 0.69230769
## 23 0.2106445333 0.34615385 0.30769231 0.38461538
## 24 0.2000434333 0.30769231 0.23076923 0.38461538
## 25 0.0000000000 0.92307692 0.00000000 1.00000000
## 26 0.1889662133         NA 0.00000000 0.23076923
## [1] "0.700005 0.1823045733 0.1918211333"

##  [1] 0.0000005733 0.2090210233 0.0000017199 0.2285613333 0.2793751713
##  [6] 0.2352994533 0.2728008709 0.2319466333 0.2658284713 0.2152677333
## [11] 0.2450228713 0.1995011713 0.2106445333
## [1] 0.003
## [1] "mse 0.000965112171597633"
t = plot_tree(paste0(PopStr_dir, "High_anc_coef_only_outgroups_positions.with_outgroups.m2.treemix.out"))
##     V1    V2       V3      V4      V5  V6  V7 V8  V9 V10
## 1    1 Zpa63 NOT_ROOT NOT_MIG     TIP 305  NA NA  NA  NA
## 2    2  <NA> NOT_ROOT NOT_MIG NOT_TIP 305   4  1  32  11
## 3    3    V6 NOT_ROOT NOT_MIG     TIP  16  NA NA  NA  NA
## 4    4  Za17 NOT_ROOT NOT_MIG     TIP   2  NA NA  NA  NA
## 5   15    V7 NOT_ROOT NOT_MIG     TIP  16  NA NA  NA  NA
## 6   16  <NA> NOT_ROOT NOT_MIG NOT_TIP 172  15  1   3   1
## 7   31    V4 NOT_ROOT NOT_MIG     TIP 136  NA NA  NA  NA
## 8   32  <NA> NOT_ROOT NOT_MIG NOT_TIP   2 172  3 104   8
## 9   51   V10 NOT_ROOT NOT_MIG     TIP  76  NA NA  NA  NA
## 10  52  <NA> NOT_ROOT NOT_MIG NOT_TIP 104  76  2 256   4
## 11  75    V8 NOT_ROOT NOT_MIG     TIP  76  NA NA  NA  NA
## 12  76  <NA> NOT_ROOT NOT_MIG NOT_TIP  52  51  1  75   1
## 13 103    V9 NOT_ROOT NOT_MIG     TIP 304  NA NA  NA  NA
## 14 104  <NA> NOT_ROOT NOT_MIG NOT_TIP  32  52  6 304   2
## 15 135    V1 NOT_ROOT NOT_MIG     TIP 136  NA NA  NA  NA
## 16 136  <NA> NOT_ROOT NOT_MIG NOT_TIP 212 135  1  31   1
## 17 171   V11 NOT_ROOT NOT_MIG     TIP 172  NA NA  NA  NA
## 18 172  <NA> NOT_ROOT NOT_MIG NOT_TIP  32  16  2 171   1
## 19 211    V3 NOT_ROOT NOT_MIG     TIP 212  NA NA  NA  NA
## 20 212  <NA> NOT_ROOT NOT_MIG NOT_TIP 256 136  2 211   1
## 21 255    V2 NOT_ROOT NOT_MIG     TIP 256  NA NA  NA  NA
## 22 256  <NA> NOT_ROOT NOT_MIG NOT_TIP  52 255  1 212   3
## 23 303    V5 NOT_ROOT NOT_MIG     TIP 304  NA NA  NA  NA
## 24 304  <NA> NOT_ROOT NOT_MIG NOT_TIP 104 303  1 103   1
## 25 305  <NA>     ROOT NOT_MIG NOT_TIP 305   1  1   2  12
## 26 321  <NA> NOT_ROOT     MIG NOT_TIP  32 172 NA  NA  NA
## 27 365  <NA> NOT_ROOT     MIG NOT_TIP 136  31 NA  NA  NA
##                                                                                                                                                                                                                                                                                                                   V11
## 1                                                                                                                                                                                                                                                                                                   Zpa63:1.13067e-06
## 2                       (Za17:2.26134e-06,(((V7:0.0253633,V6:0.005823):0.011378,V11:0.0234477):0.00952983,(((V10:0.0139363,V8:0.1872):0.0223637,(V2:0,((V1:0.0435682,V4:0.0579283):0.0111987,V3:0.033596):0.0119633):0.00114129):0.00837946,(V5:0.00707124,V9:0.0461891):0.0118319):0.00751057):0.182401):1.13067e-06
## 3                                                                                                                                                                                                                                                                                                         V6:0.005823
## 4                                                                                                                                                                                                                                                                                                    Za17:2.26134e-06
## 5                                                                                                                                                                                                                                                                                                        V7:0.0253633
## 6                                                                                                                                                                                                                                                                                 (V7:0.0253633,V6:0.005823):0.011378
## 7                                                                                                                                                                                                                                                                                                        V4:0.0579283
## 8                                                      (((V7:0.0253633,V6:0.005823):0.011378,V11:0.0234477):0.00952983,(((V10:0.0139363,V8:0.1872):0.0223637,(V2:0,((V1:0.0435682,V4:0.0579283):0.0111987,V3:0.033596):0.0119633):0.00114129):0.00837946,(V5:0.00707124,V9:0.0461891):0.0118319):0.00751057):0.182401
## 9                                                                                                                                                                                                                                                                                                       V10:0.0139363
## 10                                                                                                                                                                                   ((V10:0.0139363,V8:0.1872):0.0223637,(V2:0,((V1:0.0435682,V4:0.0579283):0.0111987,V3:0.033596):0.0119633):0.00114129):0.00837946
## 11                                                                                                                                                                                                                                                                                                          V8:0.1872
## 12                                                                                                                                                                                                                                                                                (V10:0.0139363,V8:0.1872):0.0223637
## 13                                                                                                                                                                                                                                                                                                       V9:0.0461891
## 14                                                                                                                               (((V10:0.0139363,V8:0.1872):0.0223637,(V2:0,((V1:0.0435682,V4:0.0579283):0.0111987,V3:0.033596):0.0119633):0.00114129):0.00837946,(V5:0.00707124,V9:0.0461891):0.0118319):0.00751057
## 15                                                                                                                                                                                                                                                                                                       V1:0.0435682
## 16                                                                                                                                                                                                                                                                              (V1:0.0435682,V4:0.0579283):0.0111987
## 17                                                                                                                                                                                                                                                                                                      V11:0.0234477
## 18                                                                                                                                                                                                                                                     ((V7:0.0253633,V6:0.005823):0.011378,V11:0.0234477):0.00952983
## 19                                                                                                                                                                                                                                                                                                        V3:0.033596
## 20                                                                                                                                                                                                                                                      ((V1:0.0435682,V4:0.0579283):0.0111987,V3:0.033596):0.0119633
## 21                                                                                                                                                                                                                                                                                                               V2:0
## 22                                                                                                                                                                                                                                    (V2:0,((V1:0.0435682,V4:0.0579283):0.0111987,V3:0.033596):0.0119633):0.00114129
## 23                                                                                                                                                                                                                                                                                                      V5:0.00707124
## 24                                                                                                                                                                                                                                                                             (V5:0.00707124,V9:0.0461891):0.0118319
## 25 (Zpa63:1.13067e-06,(Za17:2.26134e-06,(((V7:0.0253633,V6:0.005823):0.011378,V11:0.0234477):0.00952983,(((V10:0.0139363,V8:0.1872):0.0223637,(V2:0,((V1:0.0435682,V4:0.0579283):0.0111987,V3:0.033596):0.0119633):0.00114129):0.00837946,(V5:0.00707124,V9:0.0461891):0.0118319):0.00751057):0.182401):1.13067e-06);
## 26                                                                                                                                                                                                                                                                                                               <NA>
## 27                                                                                                                                                                                                                                                                                                               <NA>
##               x          y       ymin       ymax
## 1  1.130670e-06 0.96153846 0.92307692 1.00000000
## 2  1.130670e-06 0.84615385 0.00000000 0.92307692
## 3  2.091330e-01 0.73076923 0.69230769 0.76923077
## 4  3.392010e-06 0.88461538 0.84615385 0.92307692
## 5  2.286733e-01 0.80769231 0.76923077 0.84615385
## 6  2.033100e-01 0.76923077 0.69230769 0.84615385
## 7  2.805239e-01 0.26923077 0.23076923 0.30769231
## 8  1.824021e-01 0.61538462 0.00000000 0.84615385
## 9  2.345922e-01 0.57692308 0.53846154 0.61538462
## 10 1.982922e-01 0.46153846 0.15384615 0.61538462
## 11 2.719467e-01 0.50000000 0.46153846 0.53846154
## 12 2.206559e-01 0.53846154 0.46153846 0.61538462
## 13 2.356191e-01 0.03846154 0.00000000 0.07692308
## 14 1.899127e-01 0.15384615 0.00000000 0.61538462
## 15 2.661637e-01 0.34615385 0.30769231 0.38461538
## 16 2.225955e-01 0.30769231 0.23076923 0.38461538
## 17 2.153797e-01 0.65384615 0.61538462 0.69230769
## 18 1.919320e-01 0.69230769 0.61538462 0.84615385
## 19 2.449928e-01 0.19230769 0.15384615 0.23076923
## 20 2.113968e-01 0.23076923 0.15384615 0.38461538
## 21 1.994335e-01 0.42307692 0.38461538 0.46153846
## 22 1.994335e-01 0.38461538 0.15384615 0.46153846
## 23 2.088158e-01 0.11538462 0.07692308 0.15384615
## 24 2.017446e-01 0.07692308 0.00000000 0.15384615
## 25 0.000000e+00 0.92307692 0.00000000 1.00000000
## 26 1.881335e-01         NA 0.00000000 0.61538462
## 27 2.537494e-01         NA 0.23076923 0.30769231
## [1] "0.601412 0.18240213067 0.19193197067"
## [1] "0.5378 0.22259545067 0.28052385067"

##  [1] 1.130670e-06 2.091330e-01 3.392010e-06 2.286733e-01 2.805239e-01
##  [6] 2.345922e-01 2.719467e-01 2.356191e-01 2.661637e-01 2.153797e-01
## [11] 2.449928e-01 1.994335e-01 2.088158e-01
## [1] 0.003
## [1] "mse 0.000965112171597633"
t = plot_tree(paste0(PopStr_dir, "High_anc_coef_only_outgroups_positions.with_outgroups.m3.treemix.out"))
##     V1    V2       V3      V4      V5  V6  V7 V8  V9 V10
## 1    1 Zpa63 NOT_ROOT NOT_MIG     TIP 305  NA NA  NA  NA
## 2    2  <NA> NOT_ROOT NOT_MIG NOT_TIP 305   4  1  32  11
## 3    3    V6 NOT_ROOT NOT_MIG     TIP  16  NA NA  NA  NA
## 4    4  Za17 NOT_ROOT NOT_MIG     TIP   2  NA NA  NA  NA
## 5   15    V7 NOT_ROOT NOT_MIG     TIP  16  NA NA  NA  NA
## 6   16  <NA> NOT_ROOT NOT_MIG NOT_TIP 172  15  1   3   1
## 7   31    V4 NOT_ROOT NOT_MIG     TIP 136  NA NA  NA  NA
## 8   32  <NA> NOT_ROOT NOT_MIG NOT_TIP   2 172  3 104   8
## 9   51   V10 NOT_ROOT NOT_MIG     TIP  76  NA NA  NA  NA
## 10  52  <NA> NOT_ROOT NOT_MIG NOT_TIP 104  76  2 256   4
## 11  75    V8 NOT_ROOT NOT_MIG     TIP  76  NA NA  NA  NA
## 12  76  <NA> NOT_ROOT NOT_MIG NOT_TIP  52  51  1  75   1
## 13 103    V9 NOT_ROOT NOT_MIG     TIP 304  NA NA  NA  NA
## 14 104  <NA> NOT_ROOT NOT_MIG NOT_TIP  32  52  6 304   2
## 15 135    V1 NOT_ROOT NOT_MIG     TIP 136  NA NA  NA  NA
## 16 136  <NA> NOT_ROOT NOT_MIG NOT_TIP 212 135  1  31   1
## 17 171   V11 NOT_ROOT NOT_MIG     TIP 172  NA NA  NA  NA
## 18 172  <NA> NOT_ROOT NOT_MIG NOT_TIP  32  16  2 171   1
## 19 211    V3 NOT_ROOT NOT_MIG     TIP 212  NA NA  NA  NA
## 20 212  <NA> NOT_ROOT NOT_MIG NOT_TIP 256 136  2 211   1
## 21 255    V2 NOT_ROOT NOT_MIG     TIP 256  NA NA  NA  NA
## 22 256  <NA> NOT_ROOT NOT_MIG NOT_TIP  52 255  1 212   3
## 23 303    V5 NOT_ROOT NOT_MIG     TIP 304  NA NA  NA  NA
## 24 304  <NA> NOT_ROOT NOT_MIG NOT_TIP 104 303  1 103   1
## 25 305  <NA>     ROOT NOT_MIG NOT_TIP 305   1  1   2  12
## 26 321  <NA> NOT_ROOT     MIG NOT_TIP  32 172 NA  NA  NA
## 27 365  <NA> NOT_ROOT     MIG NOT_TIP 136  31 NA  NA  NA
## 28 404  <NA> NOT_ROOT     MIG NOT_TIP 136  31 NA  NA  NA
##                                                                                                                                                                                                                                                                                                                  V11
## 1                                                                                                                                                                                                                                                                                                  Zpa63:1.16913e-06
## 2                       (Za17:2.33828e-06,(((V7:0.0253634,V6:0.00582308):0.011378,V11:0.0234478):0.0095298,(((V10:0.0139909,V8:0.186654):0.0222929,(V2:0,((V1:0.0425648,V4:0.058998):0.019938,V3:0.0589117):0.00321):0.00114881):0.00836871,(V5:0.00723673,V9:0.0448731):0.011667):0.00751525):0.182401):1.16913e-06
## 3                                                                                                                                                                                                                                                                                                      V6:0.00582308
## 4                                                                                                                                                                                                                                                                                                   Za17:2.33828e-06
## 5                                                                                                                                                                                                                                                                                                       V7:0.0253634
## 6                                                                                                                                                                                                                                                                              (V7:0.0253634,V6:0.00582308):0.011378
## 7                                                                                                                                                                                                                                                                                                        V4:0.058998
## 8                                                      (((V7:0.0253634,V6:0.00582308):0.011378,V11:0.0234478):0.0095298,(((V10:0.0139909,V8:0.186654):0.0222929,(V2:0,((V1:0.0425648,V4:0.058998):0.019938,V3:0.0589117):0.00321):0.00114881):0.00836871,(V5:0.00723673,V9:0.0448731):0.011667):0.00751525):0.182401
## 9                                                                                                                                                                                                                                                                                                      V10:0.0139909
## 10                                                                                                                                                                                   ((V10:0.0139909,V8:0.186654):0.0222929,(V2:0,((V1:0.0425648,V4:0.058998):0.019938,V3:0.0589117):0.00321):0.00114881):0.00836871
## 11                                                                                                                                                                                                                                                                                                       V8:0.186654
## 12                                                                                                                                                                                                                                                                             (V10:0.0139909,V8:0.186654):0.0222929
## 13                                                                                                                                                                                                                                                                                                      V9:0.0448731
## 14                                                                                                                                (((V10:0.0139909,V8:0.186654):0.0222929,(V2:0,((V1:0.0425648,V4:0.058998):0.019938,V3:0.0589117):0.00321):0.00114881):0.00836871,(V5:0.00723673,V9:0.0448731):0.011667):0.00751525
## 15                                                                                                                                                                                                                                                                                                      V1:0.0425648
## 16                                                                                                                                                                                                                                                                               (V1:0.0425648,V4:0.058998):0.019938
## 17                                                                                                                                                                                                                                                                                                     V11:0.0234478
## 18                                                                                                                                                                                                                                                   ((V7:0.0253634,V6:0.00582308):0.011378,V11:0.0234478):0.0095298
## 19                                                                                                                                                                                                                                                                                                      V3:0.0589117
## 20                                                                                                                                                                                                                                                        ((V1:0.0425648,V4:0.058998):0.019938,V3:0.0589117):0.00321
## 21                                                                                                                                                                                                                                                                                                              V2:0
## 22                                                                                                                                                                                                                                      (V2:0,((V1:0.0425648,V4:0.058998):0.019938,V3:0.0589117):0.00321):0.00114881
## 23                                                                                                                                                                                                                                                                                                     V5:0.00723673
## 24                                                                                                                                                                                                                                                                             (V5:0.00723673,V9:0.0448731):0.011667
## 25 (Zpa63:1.16913e-06,(Za17:2.33828e-06,(((V7:0.0253634,V6:0.00582308):0.011378,V11:0.0234478):0.0095298,(((V10:0.0139909,V8:0.186654):0.0222929,(V2:0,((V1:0.0425648,V4:0.058998):0.019938,V3:0.0589117):0.00321):0.00114881):0.00836871,(V5:0.00723673,V9:0.0448731):0.011667):0.00751525):0.182401):1.16913e-06);
## 26                                                                                                                                                                                                                                                                                                              <NA>
## 27                                                                                                                                                                                                                                                                                                              <NA>
## 28                                                                                                                                                                                                                                                                                                              <NA>
##               x          y       ymin       ymax
## 1  1.169130e-06 0.96153846 0.92307692 1.00000000
## 2  1.169130e-06 0.84615385 0.00000000 0.92307692
## 3  2.091330e-01 0.73076923 0.69230769 0.76923077
## 4  3.507410e-06 0.88461538 0.84615385 0.92307692
## 5  2.286734e-01 0.80769231 0.76923077 0.84615385
## 6  2.033100e-01 0.76923077 0.69230769 0.84615385
## 7  2.815809e-01 0.26923077 0.23076923 0.30769231
## 8  1.824022e-01 0.61538462 0.00000000 0.84615385
## 9  2.345699e-01 0.57692308 0.53846154 0.61538462
## 10 1.982861e-01 0.46153846 0.15384615 0.61538462
## 11 2.718618e-01 0.50000000 0.46153846 0.53846154
## 12 2.205790e-01 0.53846154 0.46153846 0.61538462
## 13 2.353702e-01 0.03846154 0.00000000 0.07692308
## 14 1.899174e-01 0.15384615 0.00000000 0.61538462
## 15 2.651477e-01 0.34615385 0.30769231 0.38461538
## 16 2.225829e-01 0.30769231 0.23076923 0.38461538
## 17 2.153798e-01 0.65384615 0.61538462 0.69230769
## 18 1.919320e-01 0.69230769 0.61538462 0.84615385
## 19 2.425220e-01 0.19230769 0.15384615 0.23076923
## 20 2.026449e-01 0.23076923 0.15384615 0.38461538
## 21 1.994349e-01 0.42307692 0.38461538 0.46153846
## 22 1.994349e-01 0.38461538 0.15384615 0.46153846
## 23 2.088211e-01 0.11538462 0.07692308 0.15384615
## 24 2.015844e-01 0.07692308 0.00000000 0.15384615
## 25 0.000000e+00 0.92307692 0.00000000 1.00000000
## 26 1.881391e-01         NA 0.00000000 0.61538462
## 27 2.630530e-01         NA 0.23076923 0.30769231
## 28 2.815809e-01         NA 0.23076923 0.30769231
## [1] "0.602 0.18240216913 0.19193196913"
## [1] "0.685957 0.22258293913 0.28158093913"
## [1] "1 0.22258293913 0.28158093913"

##  [1] 1.169130e-06 2.091330e-01 3.507410e-06 2.286734e-01 2.815809e-01
##  [6] 2.345699e-01 2.718618e-01 2.353702e-01 2.651477e-01 2.153798e-01
## [11] 2.425220e-01 1.994349e-01 2.088211e-01
## [1] 0.003
## [1] "mse 0.000965112171597633"
t = plot_tree(paste0(PopStr_dir, "High_anc_coef_only_outgroups_positions.with_outgroups.m4.treemix.out"))
##     V1    V2       V3      V4      V5  V6  V7 V8  V9 V10
## 1    1 Zpa63 NOT_ROOT NOT_MIG     TIP 305  NA NA  NA  NA
## 2    2  <NA> NOT_ROOT NOT_MIG NOT_TIP 305   4  1  32  11
## 3    3    V6 NOT_ROOT NOT_MIG     TIP  16  NA NA  NA  NA
## 4    4  Za17 NOT_ROOT NOT_MIG     TIP   2  NA NA  NA  NA
## 5   15    V7 NOT_ROOT NOT_MIG     TIP  16  NA NA  NA  NA
## 6   16  <NA> NOT_ROOT NOT_MIG NOT_TIP 172   3  1  15   1
## 7   31    V4 NOT_ROOT NOT_MIG     TIP 136  NA NA  NA  NA
## 8   32  <NA> NOT_ROOT NOT_MIG NOT_TIP   2 172  3 104   8
## 9   51   V10 NOT_ROOT NOT_MIG     TIP  76  NA NA  NA  NA
## 10  52  <NA> NOT_ROOT NOT_MIG NOT_TIP 104 304  2  76   2
## 11  75    V8 NOT_ROOT NOT_MIG     TIP  76  NA NA  NA  NA
## 12  76  <NA> NOT_ROOT NOT_MIG NOT_TIP  52  51  1  75   1
## 13 103    V9 NOT_ROOT NOT_MIG     TIP 304  NA NA  NA  NA
## 14 104  <NA> NOT_ROOT NOT_MIG NOT_TIP  32 256  4  52   4
## 15 135    V1 NOT_ROOT NOT_MIG     TIP 136  NA NA  NA  NA
## 16 136  <NA> NOT_ROOT NOT_MIG NOT_TIP 212 135  1  31   1
## 17 171   V11 NOT_ROOT NOT_MIG     TIP 172  NA NA  NA  NA
## 18 172  <NA> NOT_ROOT NOT_MIG NOT_TIP  32  16  2 171   1
## 19 211    V3 NOT_ROOT NOT_MIG     TIP 212  NA NA  NA  NA
## 20 212  <NA> NOT_ROOT NOT_MIG NOT_TIP 256 136  2 211   1
## 21 255    V2 NOT_ROOT NOT_MIG     TIP 256  NA NA  NA  NA
## 22 256  <NA> NOT_ROOT NOT_MIG NOT_TIP 104 255  1 212   3
## 23 303    V5 NOT_ROOT NOT_MIG     TIP 304  NA NA  NA  NA
## 24 304  <NA> NOT_ROOT NOT_MIG NOT_TIP  52 103  1 303   1
## 25 305  <NA>     ROOT NOT_MIG NOT_TIP 305   1  1   2  12
## 26 319  <NA> NOT_ROOT     MIG NOT_TIP  32 172 NA  NA  NA
## 27 365  <NA> NOT_ROOT     MIG NOT_TIP 136  31 NA  NA  NA
## 28 404  <NA> NOT_ROOT     MIG NOT_TIP 136  31 NA  NA  NA
## 29 481  <NA> NOT_ROOT     MIG NOT_TIP  32 172 NA  NA  NA
##                                                                                                                                                                                                                                                                                                             V11
## 1                                                                                                                                                                                                                                                                                             Zpa63:1.16484e-06
## 2                       (Za17:2.3297e-06,(((V6:0.00582307,V7:0.0253634):0.011378,V11:0.0234478):0.0103609,((V2:0,((V1:0.0425672,V4:0.0589969):0.0199511,V3:0.0589596):0.00319451):0.00116464,((V9:0.0448765,V5:0.00724181):0.0449264,(V10:0.0150421,V8:0.167223):0.0212643):0):0.0157095):0.181488):1.16484e-06
## 3                                                                                                                                                                                                                                                                                                 V6:0.00582307
## 4                                                                                                                                                                                                                                                                                               Za17:2.3297e-06
## 5                                                                                                                                                                                                                                                                                                  V7:0.0253634
## 6                                                                                                                                                                                                                                                                         (V6:0.00582307,V7:0.0253634):0.011378
## 7                                                                                                                                                                                                                                                                                                  V4:0.0589969
## 8                                                     (((V6:0.00582307,V7:0.0253634):0.011378,V11:0.0234478):0.0103609,((V2:0,((V1:0.0425672,V4:0.0589969):0.0199511,V3:0.0589596):0.00319451):0.00116464,((V9:0.0448765,V5:0.00724181):0.0449264,(V10:0.0150421,V8:0.167223):0.0212643):0):0.0157095):0.181488
## 9                                                                                                                                                                                                                                                                                                 V10:0.0150421
## 10                                                                                                                                                                                                                             ((V9:0.0448765,V5:0.00724181):0.0449264,(V10:0.0150421,V8:0.167223):0.0212643):0
## 11                                                                                                                                                                                                                                                                                                  V8:0.167223
## 12                                                                                                                                                                                                                                                                        (V10:0.0150421,V8:0.167223):0.0212643
## 13                                                                                                                                                                                                                                                                                                 V9:0.0448765
## 14                                                                                                                               ((V2:0,((V1:0.0425672,V4:0.0589969):0.0199511,V3:0.0589596):0.00319451):0.00116464,((V9:0.0448765,V5:0.00724181):0.0449264,(V10:0.0150421,V8:0.167223):0.0212643):0):0.0157095
## 15                                                                                                                                                                                                                                                                                                 V1:0.0425672
## 16                                                                                                                                                                                                                                                                        (V1:0.0425672,V4:0.0589969):0.0199511
## 17                                                                                                                                                                                                                                                                                                V11:0.0234478
## 18                                                                                                                                                                                                                                              ((V6:0.00582307,V7:0.0253634):0.011378,V11:0.0234478):0.0103609
## 19                                                                                                                                                                                                                                                                                                 V3:0.0589596
## 20                                                                                                                                                                                                                                              ((V1:0.0425672,V4:0.0589969):0.0199511,V3:0.0589596):0.00319451
## 21                                                                                                                                                                                                                                                                                                         V2:0
## 22                                                                                                                                                                                                                            (V2:0,((V1:0.0425672,V4:0.0589969):0.0199511,V3:0.0589596):0.00319451):0.00116464
## 23                                                                                                                                                                                                                                                                                                V5:0.00724181
## 24                                                                                                                                                                                                                                                                       (V9:0.0448765,V5:0.00724181):0.0449264
## 25 (Zpa63:1.16484e-06,(Za17:2.3297e-06,(((V6:0.00582307,V7:0.0253634):0.011378,V11:0.0234478):0.0103609,((V2:0,((V1:0.0425672,V4:0.0589969):0.0199511,V3:0.0589596):0.00319451):0.00116464,((V9:0.0448765,V5:0.00724181):0.0449264,(V10:0.0150421,V8:0.167223):0.0212643):0):0.0157095):0.181488):1.16484e-06);
## 26                                                                                                                                                                                                                                                                                                         <NA>
## 27                                                                                                                                                                                                                                                                                                         <NA>
## 28                                                                                                                                                                                                                                                                                                         <NA>
## 29                                                                                                                                                                                                                                                                                                         <NA>
##               x          y       ymin       ymax
## 1  1.164840e-06 0.96153846 0.92307692 1.00000000
## 2  1.164840e-06 0.84615385 0.00000000 0.92307692
## 3  2.090511e-01 0.80769231 0.76923077 0.84615385
## 4  3.494540e-06 0.88461538 0.84615385 0.92307692
## 5  2.285914e-01 0.73076923 0.69230769 0.76923077
## 6  2.032280e-01 0.76923077 0.69230769 0.84615385
## 7  2.805058e-01 0.42307692 0.38461538 0.46153846
## 8  1.814892e-01 0.61538462 0.00000000 0.84615385
## 9  2.335051e-01 0.11538462 0.07692308 0.15384615
## 10 1.971987e-01 0.15384615 0.00000000 0.30769231
## 11 2.696251e-01 0.03846154 0.00000000 0.07692308
## 12 2.184630e-01 0.07692308 0.00000000 0.15384615
## 13 2.471303e-01 0.26923077 0.23076923 0.30769231
## 14 1.971987e-01 0.30769231 0.00000000 0.61538462
## 15 2.640761e-01 0.50000000 0.46153846 0.53846154
## 16 2.215089e-01 0.46153846 0.38461538 0.53846154
## 17 2.152978e-01 0.65384615 0.61538462 0.69230769
## 18 1.918500e-01 0.69230769 0.61538462 0.84615385
## 19 2.414438e-01 0.34615385 0.30769231 0.38461538
## 20 2.015578e-01 0.38461538 0.30769231 0.53846154
## 21 1.983633e-01 0.57692308 0.53846154 0.61538462
## 22 1.983633e-01 0.53846154 0.30769231 0.61538462
## 23 2.205882e-01 0.19230769 0.15384615 0.23076923
## 24 2.133464e-01 0.23076923 0.15384615 0.30769231
## 25 0.000000e+00 0.92307692 0.00000000 1.00000000
## 26 1.896468e-01         NA 0.00000000 0.61538462
## 27 2.617726e-01         NA 0.38461538 0.46153846
## 28 2.805058e-01         NA 0.38461538 0.46153846
## 29 1.918500e-01         NA 0.00000000 0.61538462
## [1] "0.787347 0.18148916484 0.19185004524"
## [1] "0.682471 0.22150891484 0.28050581484"
## [1] "1 0.22150891484 0.28050581484"
## [1] "0.787476 0.18148916484 0.19185004524"

##  [1] 1.164840e-06 2.090511e-01 3.494540e-06 2.285914e-01 2.805058e-01
##  [6] 2.335051e-01 2.696251e-01 2.471303e-01 2.640761e-01 2.152978e-01
## [11] 2.414438e-01 1.983633e-01 2.205882e-01
## [1] 0.003
## [1] "mse 0.000965112171597633"
t = plot_tree(paste0(PopStr_dir, "High_anc_coef_only_outgroups_positions.with_outgroups.m5.treemix.out"))
##     V1    V2       V3      V4      V5  V6  V7 V8  V9 V10
## 1    1 Zpa63 NOT_ROOT NOT_MIG     TIP 305  NA NA  NA  NA
## 2    2  <NA> NOT_ROOT NOT_MIG NOT_TIP 305   4  1  32  11
## 3    3    V6 NOT_ROOT NOT_MIG     TIP  16  NA NA  NA  NA
## 4    4  Za17 NOT_ROOT NOT_MIG     TIP   2  NA NA  NA  NA
## 5   15    V7 NOT_ROOT NOT_MIG     TIP  16  NA NA  NA  NA
## 6   16  <NA> NOT_ROOT NOT_MIG NOT_TIP 172   3  1  15   1
## 7   31    V4 NOT_ROOT NOT_MIG     TIP 136  NA NA  NA  NA
## 8   32  <NA> NOT_ROOT NOT_MIG NOT_TIP   2 172  3 104   8
## 9   51   V10 NOT_ROOT NOT_MIG     TIP  76  NA NA  NA  NA
## 10  52  <NA> NOT_ROOT NOT_MIG NOT_TIP 104  76  2 304   2
## 11  75    V8 NOT_ROOT NOT_MIG     TIP  76  NA NA  NA  NA
## 12  76  <NA> NOT_ROOT NOT_MIG NOT_TIP  52  51  1  75   1
## 13 103    V9 NOT_ROOT NOT_MIG     TIP 304  NA NA  NA  NA
## 14 104  <NA> NOT_ROOT NOT_MIG NOT_TIP  32 256  4  52   4
## 15 135    V1 NOT_ROOT NOT_MIG     TIP 136  NA NA  NA  NA
## 16 136  <NA> NOT_ROOT NOT_MIG NOT_TIP 212 135  1  31   1
## 17 171   V11 NOT_ROOT NOT_MIG     TIP 172  NA NA  NA  NA
## 18 172  <NA> NOT_ROOT NOT_MIG NOT_TIP  32  16  2 171   1
## 19 211    V3 NOT_ROOT NOT_MIG     TIP 212  NA NA  NA  NA
## 20 212  <NA> NOT_ROOT NOT_MIG NOT_TIP 256 136  2 211   1
## 21 255    V2 NOT_ROOT NOT_MIG     TIP 256  NA NA  NA  NA
## 22 256  <NA> NOT_ROOT NOT_MIG NOT_TIP 104 255  1 212   3
## 23 303    V5 NOT_ROOT NOT_MIG     TIP 304  NA NA  NA  NA
## 24 304  <NA> NOT_ROOT NOT_MIG NOT_TIP  52 303  1 103   1
## 25 305  <NA>     ROOT NOT_MIG NOT_TIP 305   1  1   2  12
## 26 321  <NA> NOT_ROOT     MIG NOT_TIP  32 172 NA  NA  NA
## 27 365  <NA> NOT_ROOT     MIG NOT_TIP 136  31 NA  NA  NA
## 28 404  <NA> NOT_ROOT     MIG NOT_TIP 136  31 NA  NA  NA
## 29 484  <NA> NOT_ROOT     MIG NOT_TIP  32 172 NA  NA  NA
## 30 533  <NA> NOT_ROOT     MIG NOT_TIP 212 211 NA  NA  NA
##                                                                                                                                                                                                                                                                                                            V11
## 1                                                                                                                                                                                                                                                                                            Zpa63:1.43583e-06
## 2                       (Za17:2.87164e-06,(((V6:0.00582361,V7:0.0253639):0.011378,V11:0.0234483):0.0103338,((V2:0,((V1:0.042426,V4:0.0589594):0.0213852,V3:0.068422):0.00197171):0.00325345,((V10:0.0154789,V8:0.160978):0.0646884,(V5:0.00726655,V9:0.0445142):0.0345319):0):0.0133917):0.181589):1.43583e-06
## 3                                                                                                                                                                                                                                                                                                V6:0.00582361
## 4                                                                                                                                                                                                                                                                                             Za17:2.87164e-06
## 5                                                                                                                                                                                                                                                                                                 V7:0.0253639
## 6                                                                                                                                                                                                                                                                        (V6:0.00582361,V7:0.0253639):0.011378
## 7                                                                                                                                                                                                                                                                                                 V4:0.0589594
## 8                                                      (((V6:0.00582361,V7:0.0253639):0.011378,V11:0.0234483):0.0103338,((V2:0,((V1:0.042426,V4:0.0589594):0.0213852,V3:0.068422):0.00197171):0.00325345,((V10:0.0154789,V8:0.160978):0.0646884,(V5:0.00726655,V9:0.0445142):0.0345319):0):0.0133917):0.181589
## 9                                                                                                                                                                                                                                                                                                V10:0.0154789
## 10                                                                                                                                                                                                                            ((V10:0.0154789,V8:0.160978):0.0646884,(V5:0.00726655,V9:0.0445142):0.0345319):0
## 11                                                                                                                                                                                                                                                                                                 V8:0.160978
## 12                                                                                                                                                                                                                                                                       (V10:0.0154789,V8:0.160978):0.0646884
## 13                                                                                                                                                                                                                                                                                                V9:0.0445142
## 14                                                                                                                                ((V2:0,((V1:0.042426,V4:0.0589594):0.0213852,V3:0.068422):0.00197171):0.00325345,((V10:0.0154789,V8:0.160978):0.0646884,(V5:0.00726655,V9:0.0445142):0.0345319):0):0.0133917
## 15                                                                                                                                                                                                                                                                                                 V1:0.042426
## 16                                                                                                                                                                                                                                                                        (V1:0.042426,V4:0.0589594):0.0213852
## 17                                                                                                                                                                                                                                                                                               V11:0.0234483
## 18                                                                                                                                                                                                                                             ((V6:0.00582361,V7:0.0253639):0.011378,V11:0.0234483):0.0103338
## 19                                                                                                                                                                                                                                                                                                 V3:0.068422
## 20                                                                                                                                                                                                                                               ((V1:0.042426,V4:0.0589594):0.0213852,V3:0.068422):0.00197171
## 21                                                                                                                                                                                                                                                                                                        V2:0
## 22                                                                                                                                                                                                                             (V2:0,((V1:0.042426,V4:0.0589594):0.0213852,V3:0.068422):0.00197171):0.00325345
## 23                                                                                                                                                                                                                                                                                               V5:0.00726655
## 24                                                                                                                                                                                                                                                                      (V5:0.00726655,V9:0.0445142):0.0345319
## 25 (Zpa63:1.43583e-06,(Za17:2.87164e-06,(((V6:0.00582361,V7:0.0253639):0.011378,V11:0.0234483):0.0103338,((V2:0,((V1:0.042426,V4:0.0589594):0.0213852,V3:0.068422):0.00197171):0.00325345,((V10:0.0154789,V8:0.160978):0.0646884,(V5:0.00726655,V9:0.0445142):0.0345319):0):0.0133917):0.181589):1.43583e-06);
## 26                                                                                                                                                                                                                                                                                                        <NA>
## 27                                                                                                                                                                                                                                                                                                        <NA>
## 28                                                                                                                                                                                                                                                                                                        <NA>
## 29                                                                                                                                                                                                                                                                                                        <NA>
## 30                                                                                                                                                                                                                                                                                                        <NA>
##               x          y       ymin       ymax
## 1  1.435830e-06 0.96153846 0.92307692 1.00000000
## 2  1.435830e-06 0.84615385 0.00000000 0.92307692
## 3  2.091258e-01 0.80769231 0.76923077 0.84615385
## 4  4.307470e-06 0.88461538 0.84615385 0.92307692
## 5  2.286661e-01 0.73076923 0.69230769 0.76923077
## 6  2.033022e-01 0.76923077 0.69230769 0.84615385
## 7  2.805520e-01 0.42307692 0.38461538 0.46153846
## 8  1.815904e-01 0.61538462 0.00000000 0.84615385
## 9  2.316277e-01 0.26923077 0.23076923 0.30769231
## 10 1.949821e-01 0.15384615 0.00000000 0.30769231
## 11 2.672453e-01 0.19230769 0.15384615 0.23076923
## 12 2.161488e-01 0.23076923 0.15384615 0.30769231
## 13 2.441714e-01 0.03846154 0.00000000 0.07692308
## 14 1.949821e-01 0.30769231 0.00000000 0.61538462
## 15 2.640185e-01 0.50000000 0.46153846 0.53846154
## 16 2.215925e-01 0.46153846 0.38461538 0.53846154
## 17 2.153725e-01 0.65384615 0.61538462 0.69230769
## 18 1.919242e-01 0.69230769 0.61538462 0.84615385
## 19 2.409830e-01 0.34615385 0.30769231 0.38461538
## 20 2.002073e-01 0.38461538 0.30769231 0.53846154
## 21 1.982356e-01 0.57692308 0.53846154 0.61538462
## 22 1.982356e-01 0.53846154 0.30769231 0.61538462
## 23 2.176475e-01 0.11538462 0.07692308 0.15384615
## 24 2.103809e-01 0.07692308 0.00000000 0.15384615
## 25 0.000000e+00 0.92307692 0.00000000 1.00000000
## 26 1.902816e-01         NA 0.00000000 0.61538462
## 27 2.646733e-01         NA 0.38461538 0.46153846
## 28 2.805520e-01         NA 0.38461538 0.46153846
## 29 1.919242e-01         NA 0.00000000 0.61538462
## 30 2.038858e-01         NA 0.30769231 0.38461538
## [1] "0.841047 0.18159043583 0.19192423583"
## [1] "0.730685 0.22159249583 0.28055198583"
## [1] "0.809339 0.22159249583 0.28055198583"
## [1] "0.841047 0.18159043583 0.19192423583"
## [1] "0.0902123 0.20020729583 0.240982972350924"

##  [1] 1.435830e-06 2.091258e-01 4.307470e-06 2.286661e-01 2.805520e-01
##  [6] 2.316277e-01 2.672453e-01 2.441714e-01 2.640185e-01 2.153725e-01
## [11] 2.409830e-01 1.982356e-01 2.176475e-01
## [1] 0.003
## [1] "mse 0.000965112171597633"
t = plot_tree(paste0(PopStr_dir, "High_anc_coef_only_outgroups_positions.with_outgroups.m6.treemix.out"))
##     V1    V2       V3      V4      V5  V6  V7 V8  V9 V10
## 1    1 Zpa63 NOT_ROOT NOT_MIG     TIP 305  NA NA  NA  NA
## 2    2  <NA> NOT_ROOT NOT_MIG NOT_TIP 305  32 11   4   1
## 3    3    V6 NOT_ROOT NOT_MIG     TIP  16  NA NA  NA  NA
## 4    4  Za17 NOT_ROOT NOT_MIG     TIP   2  NA NA  NA  NA
## 5   15    V7 NOT_ROOT NOT_MIG     TIP  16  NA NA  NA  NA
## 6   16  <NA> NOT_ROOT NOT_MIG NOT_TIP 172   3  1  15   1
## 7   31    V4 NOT_ROOT NOT_MIG     TIP 136  NA NA  NA  NA
## 8   32  <NA> NOT_ROOT NOT_MIG NOT_TIP   2 172  3 104   8
## 9   51   V10 NOT_ROOT NOT_MIG     TIP  76  NA NA  NA  NA
## 10  52  <NA> NOT_ROOT     MIG NOT_TIP 104 304 NA  NA  NA
## 11  75    V8 NOT_ROOT NOT_MIG     TIP  76  NA NA  NA  NA
## 12  76  <NA> NOT_ROOT NOT_MIG NOT_TIP 533  75  1  51   1
## 13 103    V9 NOT_ROOT NOT_MIG     TIP 304  NA NA  NA  NA
## 14 104  <NA> NOT_ROOT NOT_MIG NOT_TIP  32 256  6 304   2
## 15 135    V1 NOT_ROOT NOT_MIG     TIP 136  NA NA  NA  NA
## 16 136  <NA> NOT_ROOT NOT_MIG NOT_TIP 212 135  1  31   1
## 17 171   V11 NOT_ROOT NOT_MIG     TIP 172  NA NA  NA  NA
## 18 172  <NA> NOT_ROOT NOT_MIG NOT_TIP  32  16  2 171   1
## 19 211    V3 NOT_ROOT NOT_MIG     TIP 533  NA NA  NA  NA
## 20 212  <NA> NOT_ROOT NOT_MIG NOT_TIP 256 136  2 533   3
## 21 255    V2 NOT_ROOT NOT_MIG     TIP 256  NA NA  NA  NA
## 22 256  <NA> NOT_ROOT NOT_MIG NOT_TIP 104 255  1 212   5
## 23 303    V5 NOT_ROOT NOT_MIG     TIP 304  NA NA  NA  NA
## 24 304  <NA> NOT_ROOT NOT_MIG NOT_TIP 104 303  1 103   1
## 25 305  <NA>     ROOT NOT_MIG NOT_TIP 305   1  1   2  12
## 26 321  <NA> NOT_ROOT     MIG NOT_TIP  32 172 NA  NA  NA
## 27 365  <NA> NOT_ROOT     MIG NOT_TIP 136  31 NA  NA  NA
## 28 404  <NA> NOT_ROOT     MIG NOT_TIP 136  31 NA  NA  NA
## 29 481  <NA> NOT_ROOT     MIG NOT_TIP  32 172 NA  NA  NA
## 30 533  <NA> NOT_ROOT NOT_MIG NOT_TIP 212  76  2 211   1
## 31 620  <NA> NOT_ROOT     MIG NOT_TIP 305   2 NA  NA  NA
##                                                                                                                                                                                                                                                                                                           V11
## 1                                                                                                                                                                                                                                                                                                     Zpa63:0
## 2             ((((V6:0.00582354,V7:0.0253638):0.011378,V11:0.0234483):0.00982902,((V2:0,((V1:0.0425007,V4:0.0589566):0.0222719,((V8:0.176954,V10:0.0147421):0.0435919,V3:0.071955):0.00424389):0.00103085):0.00318635,(V5:0.0072335,V9:0.0448782):0.0343367):0.0136708):0.181977,Za17:2.2394e-06):2.84783e-06
## 3                                                                                                                                                                                                                                                                                               V6:0.00582354
## 4                                                                                                                                                                                                                                                                                             Za17:2.2394e-06
## 5                                                                                                                                                                                                                                                                                                V7:0.0253638
## 6                                                                                                                                                                                                                                                                       (V6:0.00582354,V7:0.0253638):0.011378
## 7                                                                                                                                                                                                                                                                                                V4:0.0589566
## 8                                           (((V6:0.00582354,V7:0.0253638):0.011378,V11:0.0234483):0.00982902,((V2:0,((V1:0.0425007,V4:0.0589566):0.0222719,((V8:0.176954,V10:0.0147421):0.0435919,V3:0.071955):0.00424389):0.00103085):0.00318635,(V5:0.0072335,V9:0.0448782):0.0343367):0.0136708):0.181977
## 9                                                                                                                                                                                                                                                                                               V10:0.0147421
## 10                                                                                                                                                                                                                                                                                                       <NA>
## 11                                                                                                                                                                                                                                                                                                V8:0.176954
## 12                                                                                                                                                                                                                                                                      (V8:0.176954,V10:0.0147421):0.0435919
## 13                                                                                                                                                                                                                                                                                               V9:0.0448782
## 14                                                                                                                      ((V2:0,((V1:0.0425007,V4:0.0589566):0.0222719,((V8:0.176954,V10:0.0147421):0.0435919,V3:0.071955):0.00424389):0.00103085):0.00318635,(V5:0.0072335,V9:0.0448782):0.0343367):0.0136708
## 15                                                                                                                                                                                                                                                                                               V1:0.0425007
## 16                                                                                                                                                                                                                                                                      (V1:0.0425007,V4:0.0589566):0.0222719
## 17                                                                                                                                                                                                                                                                                              V11:0.0234483
## 18                                                                                                                                                                                                                                           ((V6:0.00582354,V7:0.0253638):0.011378,V11:0.0234483):0.00982902
## 19                                                                                                                                                                                                                                                                                                V3:0.071955
## 20                                                                                                                                                                                          ((V1:0.0425007,V4:0.0589566):0.0222719,((V8:0.176954,V10:0.0147421):0.0435919,V3:0.071955):0.00424389):0.00103085
## 21                                                                                                                                                                                                                                                                                                       V2:0
## 22                                                                                                                                                                        (V2:0,((V1:0.0425007,V4:0.0589566):0.0222719,((V8:0.176954,V10:0.0147421):0.0435919,V3:0.071955):0.00424389):0.00103085):0.00318635
## 23                                                                                                                                                                                                                                                                                               V5:0.0072335
## 24                                                                                                                                                                                                                                                                      (V5:0.0072335,V9:0.0448782):0.0343367
## 25 (Zpa63:0,((((V6:0.00582354,V7:0.0253638):0.011378,V11:0.0234483):0.00982902,((V2:0,((V1:0.0425007,V4:0.0589566):0.0222719,((V8:0.176954,V10:0.0147421):0.0435919,V3:0.071955):0.00424389):0.00103085):0.00318635,(V5:0.0072335,V9:0.0448782):0.0343367):0.0136708):0.181977,Za17:2.2394e-06):2.84783e-06);
## 26                                                                                                                                                                                                                                                                                                       <NA>
## 27                                                                                                                                                                                                                                                                                                       <NA>
## 28                                                                                                                                                                                                                                                                                                       <NA>
## 29                                                                                                                                                                                                                                                                                                       <NA>
## 30                                                                                                                                                                                                                                             ((V8:0.176954,V10:0.0147421):0.0435919,V3:0.071955):0.00424389
## 31                                                                                                                                                                                                                                                                                                       <NA>
##               x          y       ymin       ymax
## 1  0.000000e+00 0.96153846 0.92307692 1.00000000
## 2  2.847830e-06 0.07692308 0.00000000 0.92307692
## 3  2.090104e-01 0.88461538 0.84615385 0.92307692
## 4  5.087230e-06 0.03846154 0.00000000 0.07692308
## 5  2.285507e-01 0.80769231 0.76923077 0.84615385
## 6  2.031869e-01 0.84615385 0.76923077 0.92307692
## 7  2.810964e-01 0.50000000 0.46153846 0.53846154
## 8  1.819798e-01 0.69230769 0.07692308 0.92307692
## 9  2.409417e-01 0.34615385 0.30769231 0.38461538
## 10 1.966620e-01         NA 0.07692308 0.23076923
## 11 2.776415e-01 0.42307692 0.38461538 0.46153846
## 12 2.266214e-01 0.38461538 0.30769231 0.46153846
## 13 2.447446e-01 0.11538462 0.07692308 0.15384615
## 14 1.956506e-01 0.23076923 0.07692308 0.69230769
## 15 2.646404e-01 0.57692308 0.53846154 0.61538462
## 16 2.221397e-01 0.53846154 0.46153846 0.61538462
## 17 2.152572e-01 0.73076923 0.69230769 0.76923077
## 18 1.918089e-01 0.76923077 0.69230769 0.92307692
## 19 2.427693e-01 0.26923077 0.23076923 0.30769231
## 20 1.998678e-01 0.46153846 0.23076923 0.61538462
## 21 1.988370e-01 0.65384615 0.61538462 0.69230769
## 22 1.988370e-01 0.61538462 0.23076923 0.69230769
## 23 2.181631e-01 0.19230769 0.15384615 0.23076923
## 24 2.109296e-01 0.15384615 0.07692308 0.23076923
## 25 0.000000e+00 0.92307692 0.00000000 1.00000000
## 26 1.896387e-01         NA 0.07692308 0.69230769
## 27 2.617896e-01         NA 0.46153846 0.53846154
## 28 2.810964e-01         NA 0.46153846 0.53846154
## 29 1.918089e-01         NA 0.07692308 0.69230769
## 30 2.041117e-01 0.30769231 0.23076923 0.46153846
## 31 0.000000e+00         NA 0.00000000 0.92307692
## [1] "0.0661926 0.19565064783 0.210929568478836"
## [1] "0.779213 0.18197984783 0.19180886783"
## [1] "0.672527 0.22213974783 0.28109639383"
## [1] "0.684004 0.22213974783 0.28109639383"
## [1] "0.779213 0.18197984783 0.19180886783"
## [1] "0 0 2.84783e-06"

##  [1] 0.000000e+00 2.090104e-01 5.087230e-06 2.285507e-01 2.810964e-01
##  [6] 2.409417e-01 2.776415e-01 2.447446e-01 2.646404e-01 2.152572e-01
## [11] 2.427693e-01 1.988370e-01 2.181631e-01
## [1] 0.003
## [1] "mse 0.000965112171597633"
t = plot_tree(paste0(PopStr_dir, "High_anc_coef_only_outgroups_positions.with_outgroups.m7.treemix.out"))
##     V1    V2       V3      V4      V5  V6  V7 V8  V9 V10
## 1    1 Zpa63 NOT_ROOT NOT_MIG     TIP 305  NA NA  NA  NA
## 2    2  <NA> NOT_ROOT NOT_MIG NOT_TIP 305   4  1  32  11
## 3    3    V6 NOT_ROOT NOT_MIG     TIP  16  NA NA  NA  NA
## 4    4  Za17 NOT_ROOT NOT_MIG     TIP   2  NA NA  NA  NA
## 5   15    V7 NOT_ROOT NOT_MIG     TIP  16  NA NA  NA  NA
## 6   16  <NA> NOT_ROOT NOT_MIG NOT_TIP 172   3  1  15   1
## 7   31    V4 NOT_ROOT NOT_MIG     TIP 136  NA NA  NA  NA
## 8   32  <NA> NOT_ROOT NOT_MIG NOT_TIP   2 172  3 104   8
## 9   51   V10 NOT_ROOT NOT_MIG     TIP  76  NA NA  NA  NA
## 10  52  <NA> NOT_ROOT     MIG NOT_TIP 104 304 NA  NA  NA
## 11  75    V8 NOT_ROOT NOT_MIG     TIP  76  NA NA  NA  NA
## 12  76  <NA> NOT_ROOT NOT_MIG NOT_TIP 533  51  1  75   1
## 13 103    V9 NOT_ROOT NOT_MIG     TIP 304  NA NA  NA  NA
## 14 104  <NA> NOT_ROOT NOT_MIG NOT_TIP  32 256  6 304   2
## 15 135    V1 NOT_ROOT NOT_MIG     TIP 136  NA NA  NA  NA
## 16 136  <NA> NOT_ROOT NOT_MIG NOT_TIP 212  31  1 135   1
## 17 171   V11 NOT_ROOT NOT_MIG     TIP 172  NA NA  NA  NA
## 18 172  <NA> NOT_ROOT NOT_MIG NOT_TIP  32 171  1  16   2
## 19 211    V3 NOT_ROOT NOT_MIG     TIP 533  NA NA  NA  NA
## 20 212  <NA> NOT_ROOT NOT_MIG NOT_TIP 256 136  2 533   3
## 21 255    V2 NOT_ROOT NOT_MIG     TIP 256  NA NA  NA  NA
## 22 256  <NA> NOT_ROOT NOT_MIG NOT_TIP 104 255  1 212   5
## 23 303    V5 NOT_ROOT NOT_MIG     TIP 304  NA NA  NA  NA
## 24 304  <NA> NOT_ROOT NOT_MIG NOT_TIP 104 303  1 103   1
## 25 305  <NA>     ROOT NOT_MIG NOT_TIP 305   1  1   2  12
## 26 321  <NA> NOT_ROOT     MIG NOT_TIP  32 172 NA  NA  NA
## 27 365  <NA> NOT_ROOT     MIG NOT_TIP 136  31 NA  NA  NA
## 28 404  <NA> NOT_ROOT     MIG NOT_TIP 136  31 NA  NA  NA
## 29 481  <NA> NOT_ROOT     MIG NOT_TIP  32 172 NA  NA  NA
## 30 533  <NA> NOT_ROOT NOT_MIG NOT_TIP 212  76  2 211   1
## 31 702  <NA> NOT_ROOT     MIG NOT_TIP 304 103 NA  NA  NA
## 32 620  <NA> NOT_ROOT     MIG NOT_TIP   2  32 NA  NA  NA
##                                                                                                                                                                                                                                                                                                              V11
## 1                                                                                                                                                                                                                                                                                              Zpa63:8.71101e-07
## 2                       (Za17:0,((V11:0.0258657,(V6:0.0058236,V7:0.0253639):0.0106469):0.0103785,((V2:0,((V4:0.0589702,V1:0.0425645):0.0223678,((V10:0.0151966,V8:0.181694):0.0334395,V3:0.0738101):0.00378898):0.000927666):0.00380613,(V5:0.00680393,V9:0.0466579):0.0318388):0.0131475):0.181864):8.71101e-07
## 3                                                                                                                                                                                                                                                                                                   V6:0.0058236
## 4                                                                                                                                                                                                                                                                                                         Za17:0
## 5                                                                                                                                                                                                                                                                                                   V7:0.0253639
## 6                                                                                                                                                                                                                                                                          (V6:0.0058236,V7:0.0253639):0.0106469
## 7                                                                                                                                                                                                                                                                                                   V4:0.0589702
## 8                                            ((V11:0.0258657,(V6:0.0058236,V7:0.0253639):0.0106469):0.0103785,((V2:0,((V4:0.0589702,V1:0.0425645):0.0223678,((V10:0.0151966,V8:0.181694):0.0334395,V3:0.0738101):0.00378898):0.000927666):0.00380613,(V5:0.00680393,V9:0.0466579):0.0318388):0.0131475):0.181864
## 9                                                                                                                                                                                                                                                                                                  V10:0.0151966
## 10                                                                                                                                                                                                                                                                                                          <NA>
## 11                                                                                                                                                                                                                                                                                                   V8:0.181694
## 12                                                                                                                                                                                                                                                                         (V10:0.0151966,V8:0.181694):0.0334395
## 13                                                                                                                                                                                                                                                                                                  V9:0.0466579
## 14                                                                                                                      ((V2:0,((V4:0.0589702,V1:0.0425645):0.0223678,((V10:0.0151966,V8:0.181694):0.0334395,V3:0.0738101):0.00378898):0.000927666):0.00380613,(V5:0.00680393,V9:0.0466579):0.0318388):0.0131475
## 15                                                                                                                                                                                                                                                                                                  V1:0.0425645
## 16                                                                                                                                                                                                                                                                         (V4:0.0589702,V1:0.0425645):0.0223678
## 17                                                                                                                                                                                                                                                                                                 V11:0.0258657
## 18                                                                                                                                                                                                                                               (V11:0.0258657,(V6:0.0058236,V7:0.0253639):0.0106469):0.0103785
## 19                                                                                                                                                                                                                                                                                                  V3:0.0738101
## 20                                                                                                                                                                                           ((V4:0.0589702,V1:0.0425645):0.0223678,((V10:0.0151966,V8:0.181694):0.0334395,V3:0.0738101):0.00378898):0.000927666
## 21                                                                                                                                                                                                                                                                                                          V2:0
## 22                                                                                                                                                                         (V2:0,((V4:0.0589702,V1:0.0425645):0.0223678,((V10:0.0151966,V8:0.181694):0.0334395,V3:0.0738101):0.00378898):0.000927666):0.00380613
## 23                                                                                                                                                                                                                                                                                                 V5:0.00680393
## 24                                                                                                                                                                                                                                                                        (V5:0.00680393,V9:0.0466579):0.0318388
## 25 (Zpa63:8.71101e-07,(Za17:0,((V11:0.0258657,(V6:0.0058236,V7:0.0253639):0.0106469):0.0103785,((V2:0,((V4:0.0589702,V1:0.0425645):0.0223678,((V10:0.0151966,V8:0.181694):0.0334395,V3:0.0738101):0.00378898):0.000927666):0.00380613,(V5:0.00680393,V9:0.0466579):0.0318388):0.0131475):0.181864):8.71101e-07);
## 26                                                                                                                                                                                                                                                                                                          <NA>
## 27                                                                                                                                                                                                                                                                                                          <NA>
## 28                                                                                                                                                                                                                                                                                                          <NA>
## 29                                                                                                                                                                                                                                                                                                          <NA>
## 30                                                                                                                                                                                                                                               ((V10:0.0151966,V8:0.181694):0.0334395,V3:0.0738101):0.00378898
## 31                                                                                                                                                                                                                                                                                                          <NA>
## 32                                                                                                                                                                                                                                                                                                          <NA>
##               x          y       ymin       ymax
## 1  8.711010e-07 0.96153846 0.92307692 1.00000000
## 2  8.711010e-07 0.84615385 0.00000000 0.92307692
## 3  2.087135e-01 0.73076923 0.69230769 0.76923077
## 4  8.711010e-07 0.88461538 0.84615385 0.92307692
## 5  2.282538e-01 0.65384615 0.61538462 0.69230769
## 6  2.028899e-01 0.69230769 0.61538462 0.76923077
## 7  2.810838e-01 0.50000000 0.46153846 0.53846154
## 8  1.818645e-01 0.61538462 0.00000000 0.84615385
## 9  2.407640e-01 0.34615385 0.30769231 0.38461538
## 10 1.977550e-01         NA 0.00000000 0.15384615
## 11 2.773615e-01 0.26923077 0.23076923 0.30769231
## 12 2.264316e-01 0.30769231 0.23076923 0.38461538
## 13 2.446166e-01 0.03846154 0.00000000 0.07692308
## 14 1.950120e-01 0.15384615 0.00000000 0.61538462
## 15 2.646781e-01 0.42307692 0.38461538 0.46153846
## 16 2.221136e-01 0.46153846 0.38461538 0.53846154
## 17 2.163304e-01 0.80769231 0.76923077 0.84615385
## 18 1.922430e-01 0.76923077 0.61538462 0.84615385
## 19 2.424522e-01 0.19230769 0.15384615 0.23076923
## 20 1.997458e-01 0.38461538 0.15384615 0.53846154
## 21 1.988181e-01 0.57692308 0.53846154 0.61538462
## 22 1.988181e-01 0.53846154 0.15384615 0.61538462
## 23 2.173447e-01 0.11538462 0.07692308 0.15384615
## 24 2.105408e-01 0.07692308 0.00000000 0.15384615
## 25 0.000000e+00 0.92307692 0.00000000 1.00000000
## 26 1.894269e-01         NA 0.00000000 0.61538462
## 27 2.566255e-01         NA 0.38461538 0.46153846
## 28 2.810838e-01         NA 0.38461538 0.46153846
## 29 1.922430e-01         NA 0.00000000 0.61538462
## 30 2.035347e-01 0.23076923 0.15384615 0.38461538
## 31 2.446166e-01         NA 0.00000000 0.07692308
## 32 8.932167e-02         NA 0.00000000 0.84615385
## [1] "0.176644 0.195011971101 0.210540791704741"
## [1] "0.728667 0.181864471101 0.192242951101"
## [1] "0.585243 0.222113567101 0.281083837101"
## [1] "0.662929 0.222113567101 0.281083837101"
## [1] "0.728667 0.181864471101 0.192242951101"
## [1] "1 0.210540791704741 0.244616577776658"
## [1] "0.491142 8.71101e-07 0.181864471101"

##  [1] 8.711010e-07 2.087135e-01 8.711010e-07 2.282538e-01 2.810838e-01
##  [6] 2.407640e-01 2.773615e-01 2.446166e-01 2.646781e-01 2.163304e-01
## [11] 2.424522e-01 1.988181e-01 2.173447e-01
## [1] 0.003
## [1] "mse 0.000965112171597633"
mig_events = c(0:6)
temp = tibble(mig_events = mig_events, 
              var_explained = sapply(paste0(PopStr_dir, "High_anc_coef_only_outgroups_positions.with_outgroups.m", 
                                            mig_events, ".treemix.out"), get_f))

ggplot(temp, aes(mig_events, var_explained)) + 
  geom_line() + 
  geom_point() + 
  geom_hline(yintercept = 0.995, color = "#cbf3f0", linetype = "dashed", size = 1) +
  theme_bw() +
  labs(x = "Number of infered migration events", y = "Proportion of explained variance",
       title = str_wrap(paste0("Variance explained by the model of Treemix with",
                               " different number of migration events"), 70))



Diversity, LD and divergence between clusters


Diversity per clusters

The genetic diversity was estimated per window of 10kb over 10 random draw of samples.

#rsync -avP  alice@130.125.25.244:/data2/alice/WW_project/3_Sumstats_demography/Sample_list_V*_*_diversity_pi.tsv \
#    ~/Documents/Postdoc_Bruce/Projects/WW_project/3_Sumstats_demography/

echo -e "Subset_samples\tChromosome\tStart\tStop\tPi\tTajimaD\tTheta" > \
  ~/Documents/Postdoc_Bruce/Projects/WW_project/3_Sumstats_demography/Diversity_per_cluster.tsv
cat ~/Documents/Postdoc_Bruce/Projects/WW_project/3_Sumstats_demography/Sample_list_V*_*_diversity_pi.tsv >>  \
  ~/Documents/Postdoc_Bruce/Projects/WW_project/3_Sumstats_demography/Diversity_per_cluster.tsv

Let’s visualize the difference of genetic diversity per cluster.

diversity_values = read_tsv(paste0(Sumstats_dir, "Diversity_per_cluster.tsv"), na = "nan") %>%
   dplyr::select(-Theta, -TajimaD)

diversity_values %>%
  dplyr::mutate(Subset_samples = str_remove(Subset_samples, "Sample_list_")) %>%
  separate(Subset_samples, into = c("Cluster", "Rep")) %>%
  group_by(Cluster) %>%
  dplyr::summarize(Pi = median(Pi, na.rm = T)) %>%
  ggplot() + 
  geom_point(aes(x = Cluster, y = Pi, color = Cluster), size = 4) +
  geom_segment(aes(x = Cluster, xend = Cluster, y = 0, yend = Pi, color = Cluster), size = 1) +
  theme_bw()+
  theme(axis.title.x = element_blank(), axis.text.x = element_text(angle = 40, hjust = 1, vjust = 1)) +
  Color_Cluster

temp = diversity_values %>% 
  mutate(Subset_samples = str_remove(Subset_samples, "Sample_list_")) %>%
  separate(Subset_samples, into = c("Cluster", "Rep")) %>%
  group_by(Cluster, Chromosome, Start, Stop) %>%
  #group_by(Cluster, Rep) %>%
  dplyr::summarize(Pi = mean(Pi, na.rm = T))



pi_data = temp %>%
  filter(!is.na(Cluster)) %>%
  group_by(Cluster) %>%
  dplyr::mutate(cluster_avg = median(Pi, na.rm = T))


#One-way ANOVA with blocks
##Define linear model

model = lm(Pi ~ Cluster,
          data=pi_data)
summary(model)   ### Will show overall p-value and r-squared
## 
## Call:
## lm(formula = Pi ~ Cluster, data = pi_data)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.014185 -0.005772 -0.001738  0.004208  0.066385 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.0057819  0.0001419  40.739  < 2e-16 ***
## ClusterV10   0.0050513  0.0002007  25.167  < 2e-16 ***
## ClusterV11   0.0080221  0.0002007  39.969  < 2e-16 ***
## ClusterV2    0.0075684  0.0002007  37.708  < 2e-16 ***
## ClusterV3    0.0031074  0.0002007  15.482  < 2e-16 ***
## ClusterV4   -0.0013933  0.0002007  -6.942 3.93e-12 ***
## ClusterV5    0.0061510  0.0002007  30.646  < 2e-16 ***
## ClusterV6    0.0084033  0.0002007  41.868  < 2e-16 ***
## ClusterV7    0.0060794  0.0002007  30.289  < 2e-16 ***
## ClusterV8    0.0031009  0.0002007  15.449  < 2e-16 ***
## ClusterV9    0.0023513  0.0002007  11.715  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.008584 on 40227 degrees of freedom
## Multiple R-squared:  0.1157, Adjusted R-squared:  0.1154 
## F-statistic: 526.1 on 10 and 40227 DF,  p-value: < 2.2e-16
##Conduct analysis of variance
Anova(model,type = "II")  
## Anova Table (Type II tests)
## 
## Response: Pi
##            Sum Sq    Df F value    Pr(>F)    
## Cluster   0.38765    10  526.12 < 2.2e-16 ***
## Residuals 2.96396 40227                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(model)
## 
## Call:
## lm(formula = Pi ~ Cluster, data = pi_data)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.014185 -0.005772 -0.001738  0.004208  0.066385 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.0057819  0.0001419  40.739  < 2e-16 ***
## ClusterV10   0.0050513  0.0002007  25.167  < 2e-16 ***
## ClusterV11   0.0080221  0.0002007  39.969  < 2e-16 ***
## ClusterV2    0.0075684  0.0002007  37.708  < 2e-16 ***
## ClusterV3    0.0031074  0.0002007  15.482  < 2e-16 ***
## ClusterV4   -0.0013933  0.0002007  -6.942 3.93e-12 ***
## ClusterV5    0.0061510  0.0002007  30.646  < 2e-16 ***
## ClusterV6    0.0084033  0.0002007  41.868  < 2e-16 ***
## ClusterV7    0.0060794  0.0002007  30.289  < 2e-16 ***
## ClusterV8    0.0031009  0.0002007  15.449  < 2e-16 ***
## ClusterV9    0.0023513  0.0002007  11.715  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.008584 on 40227 degrees of freedom
## Multiple R-squared:  0.1157, Adjusted R-squared:  0.1154 
## F-statistic: 526.1 on 10 and 40227 DF,  p-value: < 2.2e-16
hist(residuals(model), col="darkgray")

#Post-hoc analysis:  mean separation tests
marginal = lsmeans(model, ~ Cluster)

pairs(marginal, adjust="tukey")
##  contrast   estimate       SE    df t.ratio p.value
##  V1 - V10  -5.05e-03 0.000201 40227 -25.167  <.0001
##  V1 - V11  -8.02e-03 0.000201 40227 -39.969  <.0001
##  V1 - V2   -7.57e-03 0.000201 40227 -37.708  <.0001
##  V1 - V3   -3.11e-03 0.000201 40227 -15.482  <.0001
##  V1 - V4    1.39e-03 0.000201 40227   6.942  <.0001
##  V1 - V5   -6.15e-03 0.000201 40227 -30.646  <.0001
##  V1 - V6   -8.40e-03 0.000201 40227 -41.868  <.0001
##  V1 - V7   -6.08e-03 0.000201 40227 -30.289  <.0001
##  V1 - V8   -3.10e-03 0.000201 40227 -15.449  <.0001
##  V1 - V9   -2.35e-03 0.000201 40227 -11.715  <.0001
##  V10 - V11 -2.97e-03 0.000201 40227 -14.802  <.0001
##  V10 - V2  -2.52e-03 0.000201 40227 -12.541  <.0001
##  V10 - V3   1.94e-03 0.000201 40227   9.685  <.0001
##  V10 - V4   6.44e-03 0.000201 40227  32.109  <.0001
##  V10 - V5  -1.10e-03 0.000201 40227  -5.479  <.0001
##  V10 - V6  -3.35e-03 0.000201 40227 -16.701  <.0001
##  V10 - V7  -1.03e-03 0.000201 40227  -5.122  <.0001
##  V10 - V8   1.95e-03 0.000201 40227   9.718  <.0001
##  V10 - V9   2.70e-03 0.000201 40227  13.452  <.0001
##  V11 - V2   4.54e-04 0.000201 40227   2.261  0.4614
##  V11 - V3   4.91e-03 0.000201 40227  24.486  <.0001
##  V11 - V4   9.42e-03 0.000201 40227  46.911  <.0001
##  V11 - V5   1.87e-03 0.000201 40227   9.322  <.0001
##  V11 - V6  -3.81e-04 0.000201 40227  -1.899  0.7183
##  V11 - V7   1.94e-03 0.000201 40227   9.679  <.0001
##  V11 - V8   4.92e-03 0.000201 40227  24.519  <.0001
##  V11 - V9   5.67e-03 0.000201 40227  28.254  <.0001
##  V2 - V3    4.46e-03 0.000201 40227  22.226  <.0001
##  V2 - V4    8.96e-03 0.000201 40227  44.650  <.0001
##  V2 - V5    1.42e-03 0.000201 40227   7.062  <.0001
##  V2 - V6   -8.35e-04 0.000201 40227  -4.159  0.0016
##  V2 - V7    1.49e-03 0.000201 40227   7.419  <.0001
##  V2 - V8    4.47e-03 0.000201 40227  22.259  <.0001
##  V2 - V9    5.22e-03 0.000201 40227  25.993  <.0001
##  V3 - V4    4.50e-03 0.000201 40227  22.424  <.0001
##  V3 - V5   -3.04e-03 0.000201 40227 -15.164  <.0001
##  V3 - V6   -5.30e-03 0.000201 40227 -26.385  <.0001
##  V3 - V7   -2.97e-03 0.000201 40227 -14.807  <.0001
##  V3 - V8    6.58e-06 0.000201 40227   0.033  1.0000
##  V3 - V9    7.56e-04 0.000201 40227   3.767  0.0077
##  V4 - V5   -7.54e-03 0.000201 40227 -37.588  <.0001
##  V4 - V6   -9.80e-03 0.000201 40227 -48.809  <.0001
##  V4 - V7   -7.47e-03 0.000201 40227 -37.231  <.0001
##  V4 - V8   -4.49e-03 0.000201 40227 -22.391  <.0001
##  V4 - V9   -3.74e-03 0.000201 40227 -18.657  <.0001
##  V5 - V6   -2.25e-03 0.000201 40227 -11.221  <.0001
##  V5 - V7    7.16e-05 0.000201 40227   0.357  1.0000
##  V5 - V8    3.05e-03 0.000201 40227  15.197  <.0001
##  V5 - V9    3.80e-03 0.000201 40227  18.931  <.0001
##  V6 - V7    2.32e-03 0.000201 40227  11.578  <.0001
##  V6 - V8    5.30e-03 0.000201 40227  26.418  <.0001
##  V6 - V9    6.05e-03 0.000201 40227  30.153  <.0001
##  V7 - V8    2.98e-03 0.000201 40227  14.840  <.0001
##  V7 - V9    3.73e-03 0.000201 40227  18.574  <.0001
##  V8 - V9    7.50e-04 0.000201 40227   3.734  0.0087
## 
## P value adjustment: tukey method for comparing a family of 11 estimates
CLD = cld(marginal,
          alpha   = 0.05,
          Letters = letters,  ### Use lower-case letters for .group
          adjust  = "tukey")  ### Tukey-adjusted p-values

CLD
##  Cluster  lsmean       SE    df lower.CL upper.CL .group   
##  V4      0.00439 0.000142 40227  0.00399  0.00479  a       
##  V1      0.00578 0.000142 40227  0.00538  0.00618   b      
##  V9      0.00813 0.000142 40227  0.00773  0.00853    c     
##  V8      0.00888 0.000142 40227  0.00848  0.00928     d    
##  V3      0.00889 0.000142 40227  0.00849  0.00929     d    
##  V10     0.01083 0.000142 40227  0.01043  0.01123      e   
##  V7      0.01186 0.000142 40227  0.01146  0.01226       f  
##  V5      0.01193 0.000142 40227  0.01153  0.01233       f  
##  V2      0.01335 0.000142 40227  0.01295  0.01375        g 
##  V11     0.01380 0.000142 40227  0.01340  0.01421        gh
##  V6      0.01419 0.000142 40227  0.01378  0.01459         h
## 
## Confidence level used: 0.95 
## Conf-level adjustment: sidak method for 11 estimates 
## P value adjustment: tukey method for comparing a family of 11 estimates 
## significance level used: alpha = 0.05 
## NOTE: Compact letter displays can be misleading
##       because they show NON-findings rather than findings.
##       Consider using 'pairs()', 'pwpp()', or 'pwpm()' instead.
CLD$.group=gsub(" ", "", CLD$.group)

### Plot
ggplot(pi_data, aes(x = reorder(Cluster, -Pi), y = Pi))  +
  coord_flip() +
  geom_boxplot(outlier.shape = NA, alpha = .4, color = "grey")   +
  geom_point(aes(color = Cluster, x = Cluster, y = cluster_avg), size = 3)+
  theme_cowplot() +
    theme(legend.position = "none",
          axis.text.x = element_text(angle = 40, hjust = 1)) +
  labs (x = "", y = "Diversity (pi)") +
  ylim(c(-0.001, 0.05)) +
  geom_text(data = CLD, aes(x = Cluster, label = .group, y = 0.049), color   = "black") +
  Color_Cluster

Linkage disequilibrium decay

There was a clear effect of the number of samples on the LD estimated, so I switched to subsets.

# Running the LD stats for 10 subsets per cluster
for i in {1..11} ; 
do 
  for repet in {1..2} ; 
  do 
  echo $i $repet ; 
  sbatch -p normal.168h LD_decay.sh ./Directories_new.sh \
      /data2/alice/WW_project/2_Population_structure/0_Nuclear_genome/Sample_list_V${i}_${repet}.args \
      V${i}_${repet} ./temp.windows ; 
  done ; 
done

# Gathering all tables into one
echo -e "Pop\tRepeat\tStart\tStop\tMean\tMedian\tSstdev" \
   > /data2/alice/WW_project/2_Population_structure/0_Nuclear_genome/LD_per_cluster.tab ; 
for i in {1..11} ; 
do 
  for repet in {1..10} ; 
  do 
   awk -v pop=$i -v repet=$repet 'BEGIN {OFS = "\t"} {if ( $1 ~ /[0-9]/ ) {print pop, repet, $1, $2, $3, $4, $5 } }'  \
       /data2/alice/WW_project/2_Population_structure/0_Nuclear_genome/LD_V${i}_${repet}.tab \
       >> /data2/alice/WW_project/2_Population_structure/0_Nuclear_genome/LD_per_cluster.tab; 
  done
done

The different dots are the values estimated on different subsets of samples (for some, the subsets are the size of the cluster so the variance is null). The line here is the median of the subset estimates ()

LD_data = read_delim(paste0(nuc_PS_dir, "LD_per_cluster.tab"),
           delim = "\t") %>%
  mutate(center = (Start + Stop) / 2) %>%
  mutate(Cluster = paste0("V", Pop))

temp = LD_data %>% group_by(Cluster, center) %>%
  dplyr::summarize(Median = mean(Median))
ggplot() +
  geom_point(data = LD_data, 
             aes(x = center, y = Median, color = Cluster, shape = Cluster),
             alpha = .2) +
  geom_line(data = temp, aes(x = center, y = Median, color = Cluster, shape = Cluster)) +
  geom_hline(yintercept = 0.2, linetype = "dashed")+
  Color_Cluster + Shape_Cluster + 
  theme_bw()

ggplot() +
  geom_point(data = LD_data, 
             aes(x = center, y = Median, color = Cluster, shape = Cluster),
             alpha = .2) +
  geom_line(data = temp, aes(x = center, y = Median, color = Cluster, shape = Cluster)) +
  geom_hline(yintercept = 0.2, linetype = "dashed") + 
  Color_Cluster2 + Shape_Cluster + 
  theme_bw() +
  scale_y_log10()

temp %>% 
  filter(Median >= 0.2) %>%
  group_by(Cluster) %>%
  dplyr::summarize(Center = max(center)) %>%
ggplot() +
  geom_point(aes(x = Cluster, y = Center, color = Cluster), size = 4) +
  geom_segment(aes(x = Cluster, xend = Cluster, y = 0, yend = Center, color = Cluster), size = 1) +
  Color_Cluster + 
  theme_bw() +
  scale_y_log10() +
  labs(y = "Distance between SNPs with a Rsquared <= 0.2",
       title = "LD decay, summary statistic") +
  coord_flip()

Divergence

I estimate the Fst between clusters and represent the results as a matrix with colors indicating the Fst values. The order of the clusters is related to their place in the tree from treemix (without any migration event).

for i in  {1..11} ; do for j in {1..11} ; do  python Divergence_with_scikit-allel_Fst.py --vcf_file ../1_Variant_calling/4_Joint_calling/Ztritici_global_March2021.genotyped.ALL.filtered.clean.AB_filtered.variants.good_samples.biall_SNP.max-m-1.maf-0.05.thin-1000bp.recode.vcf  --sample_list1 ../2_Population_structure/Sample_list_V${i}.args --sample_list2 ../2_Population_structure/Sample_list_V${j}.args --out_dir ../2_Population_structure/Divergence/ --no_header ; done ; done

 echo -e "Subset1\tSubset2\tHudsons_Fst\tWeir_Cockerham_Fst" > \
  ../2_Population_structure/Divergence/Divergence.Fst_between_clusters.tsv
 cat ../2_Population_structure/Divergence/Divergence.Fst.Sample_list_V*_vs_Sample_list_V*.tsv >>  \
  ../2_Population_structure/Divergence/Divergence.Fst_between_clusters.tsv
  
  
ordering_table = tibble(Clusters = c("V1", "V2", "V3", "V4", "V5", "V6", "V7", "V8", "V9", "V10", "V11"),
                        Order_tree = c(7,9,8,6,10,2,1,5,11,4,3))

Fst_values = read_tsv(paste0(PopStr_dir, "Divergence/Divergence.Fst_between_clusters.tsv"))
temp = Fst_values %>% dplyr::select(-Hudsons_Fst) %>%
  mutate(Weir_Cockerham_Fst = ifelse(Weir_Cockerham_Fst < 0, 0, Weir_Cockerham_Fst),
         Subset1 = str_remove(Subset1, "Sample_list_"),
         Subset2 = str_remove(Subset2, "Sample_list_")) %>%
  left_join(., ordering_table, by = c("Subset1" = "Clusters")) %>%
  left_join(., ordering_table, by = c("Subset2" = "Clusters")) %>%
  arrange(Order_tree.x, Order_tree.y) %>%
  dplyr::select(-Order_tree.x, -Order_tree.y) %>%
  pivot_wider(names_from = Subset2, values_from = Weir_Cockerham_Fst)
WC_Fst_values <- as.matrix(temp[,-1])
rownames(WC_Fst_values) <- temp[,1] %>% pull()

corrplot(WC_Fst_values, method="color", is.corr=FALSE, 
         tl.col = "black", tl.srt=45, tl.cex = 1, 
         addCoef.col = "black", col=colorRampPalette(c("white","#2ec4b6"))(200))



Isolation by distance

I am curious about isolation by distance in the different continents.

#Estimate geographic distances between samples
temp = dplyr::select(Zt_meta, ID_file, Latitude, Longitude) %>%
  filter(!is.na(Latitude))
geo_distances = as.data.frame(geodist(x = temp, sequential = FALSE, measure = "geodesic")) 
colnames(geo_distances) <- temp$ID_file
geo_distances$ID_file1 = temp$ID_file
geo_distances = as.tibble(geo_distances) %>%
  pivot_longer(-ID_file1, names_to = "ID_file2", values_to = "Geo_distance")
  

#Run IBS with plink on cluster and import
# ${SOFTPATH}plink   --distance square ibs   --vcf ${vcf_dir}${VCFBasename}.genotyped.ALL.filtered.clean.AB_filtered.variants.good_samples.max-m-80.vcf.gz   --out ${vcf_dir}${VCFBasename}.genotyped.ALL.filtered.clean.AB_filtered.variants.good_samples.max-m-80   --const-fid   --not-chr 14-21 mt
#rsync -avP alice@130.125.25.244:/data2/alice/WW_project/1_Variant_calling/4_Joint_calling/Ztritici_global_March2021.genotyped.ALL.filtered.clean.AB_filtered.variants.good_samples.max-m-80.mibs.id ~/Documents/Postdoc_Bruce/Projects/WW_project/2_Population_structure/0_Nuclear_genome/All_good_samples.mibs.id
#rsync -avP alice@130.125.25.244:/data2/alice/WW_project/1_Variant_calling/4_Joint_calling/Ztritici_global_March2021.genotyped.ALL.filtered.clean.AB_filtered.variants.good_samples.max-m-80.mibs ~/Documents/Postdoc_Bruce/Projects/WW_project/2_Population_structure/0_Nuclear_genome/All_good_samples.mibs

#../Software/vcftools_jydu/src/cpp/vcftools --gzvcf ${vcf_dir}${VCFBasename}.genotyped.ALL.filtered.clean.AB_filtered.variants.good_samples.max-m-80.vcf.gz  --missing-indv  --out temp
#rsync -avP alice@130.125.25.244:/home/alice/WW_PopGen/temp.imiss ~/Documents/Postdoc_Bruce/Projects/WW_project/2_Population_structure/0_Nuclear_genome/All_good_samples.miss


#Read genetic distances
ibs = read_tsv(paste0(nuc_PS_dir, "All_good_samples.mibs.id"), col_names = c("Whatever", "ID_file1")) %>% 
  dplyr::select(ID_file1) %>% pull()
related = read_tsv(paste0(nuc_PS_dir, "All_good_samples.mibs"), col_names = ibs) %>%
  mutate(ID_file1 = ibs) %>%
  pivot_longer(names_to = "ID_file2", values_to = "Relatedness", cols = -ID_file1) 
distances = inner_join(related, geo_distances) 

#To do: compare intra-continent and inter-continent
#To do: compare intra-cluster and inter-cluster
#
distances = distances %>% 
  inner_join(Zt_meta %>% dplyr::select(ID_file1 = ID_file, Continent1 = Continent)) %>% 
  inner_join(Zt_meta %>% dplyr::select(ID_file2 = ID_file, Continent2 = Continent)) %>%
  mutate(Continent_comparison = ifelse(Continent1 == Continent2, "Intra", "Inter")) %>% 
  filter(Continent1 != "Asia") %>%
  filter(ID_file1 != ID_file2) 


#Dot plots
distances  %>%
  filter(Continent_comparison == "Intra") %>%
  ggplot(aes(x = Geo_distance, y = Relatedness)) + 
  geom_point(aes(col = Continent1), shape = 1, alpha = 0.9) + 
  theme_bw() +
  facet_wrap(vars(Continent1), scales = "free") +
  Color_Continent + 
  stat_smooth(col = "grey20", method = "lm") +
   stat_cor(aes(label = paste(..rr.label.., ..p.label.., sep = "~`,`~")),label.y = 1) +
   stat_regline_equation(label.y = 1.015)

# Discretized distances
temp = distances %>%
  filter(Continent_comparison == "Intra") %>%
  filter(Geo_distance < 4000000) %>%
  mutate(Geo_distance_disc = as.factor(500*round(Geo_distance/500000))) %>%
  dplyr::count(Continent1, Geo_distance_disc) #%>%
#filter(n >= 100)

distances %>%
  filter(Continent_comparison == "Intra") %>%
  mutate(Geo_distance_disc = as.factor(500*round(Geo_distance/500000)))%>%
 # inner_join(temp) %>%
  filter(Geo_distance < 4000000) %>%
  ggplot(aes(x = Geo_distance_disc, y = Relatedness, col = Continent1)) +
  geom_boxplot(outlier.shape = 1) + 
  theme_bw() +
  facet_wrap(vars(Continent1), scales = "free_y") +
  Color_Continent  +
  geom_text(data = temp, aes(x = Geo_distance_disc, y = 1, label = n), 
            col = "black", angle = 90, size = 4) +
  theme(axis.text.x = element_text(angle = 45, hjust =1, vjust = 1))

#Both 
temp = distances %>%
  filter(Continent_comparison == "Intra") %>%
  #filter(Geo_distance < 4000000) %>%
  mutate(Geo_distance_disc = as.factor(500000*round(Geo_distance/500000))) 

temp = distances %>%
  filter(Continent_comparison == "Intra") %>%
  #filter(Geo_distance < 4000000) %>%
  mutate(Geo_distance_disc = cut_width(Geo_distance, width = 500000)) 
ggplot() +
  geom_boxplot(data = temp, aes(x = Geo_distance_disc, y = Relatedness), outlier.shape = NA)  + 
  theme_bw() +
  facet_wrap(vars(Continent1), scales = "free")

ggplot(data = filter(distances, Continent_comparison == "Intra"), 
             aes(x = Geo_distance, y = Relatedness, col = Continent1)) + 
  geom_point(shape = 19, alpha = 0.05) +
  theme_bw() +
  facet_wrap(vars(Continent1)) +
  Color_Continent + 
  stat_smooth(col = "grey20", method = "lm") +
   stat_cor(aes(label = paste(..rr.label.., ..p.label.., sep = "~`,`~")),label.y = 1) +
   stat_regline_equation(label.y = 1.015)

As most continents contain several genetic clusters, it is possible that the effect would be at least partially driven by the higher proportion of isolates from different genetic clusters in the higher geographically distances. So here, I look at isolation-by-distance but this time per cluster instead of per continent.

temp = list()
for (i in c(1:chosen_K)){
  temp[[i]] = read_tsv(paste0(PopStr_dir, "Sample_list_V", i, ".args"), col_names = "ID_file") %>%
    mutate( Cluster  = paste0("V", i)) }

list_pure_cluster = bind_rows(temp)

distances = distances %>% 
  inner_join(list_pure_cluster %>% dplyr::select(ID_file1 = ID_file, Cluster1 = Cluster)) %>% 
  inner_join(list_pure_cluster %>% dplyr::select(ID_file2 = ID_file, Cluster2 = Cluster)) %>%
  mutate(Cluster_comparison = ifelse(Cluster1 == Cluster2, "Intra", "Inter")) 




distances %>%
  filter(Cluster_comparison == "Intra") %>%
 # inner_join(temp) %>%
#  filter(Geo_distance < 4000000) %>%
  ggplot(aes(x = Geo_distance, y = Relatedness, col = Cluster1)) +
  geom_point() + 
  theme_bw() +
  facet_wrap(vars(Cluster1), scales = "free")  +
  theme(axis.text.x = element_text(angle = 45, hjust =1, vjust = 1))+ 
  stat_smooth(col = "grey20", method = "lm") +
   stat_cor(aes(label = paste(..rr.label.., ..p.label.., sep = "~`,`~")),label.y = 1) +
   stat_regline_equation(label.y = 1.015)+
  Color_Cluster

# Discretized distances
temp = distances %>%
  filter(Cluster_comparison == "Intra") %>%
  filter(Geo_distance < 4000000) %>%
  mutate(Geo_distance_disc = as.factor(500*round(Geo_distance/500000))) %>%
  dplyr::count(Cluster1, Geo_distance_disc) #%>%
#filter(n >= 100)

distances %>%
  filter(Cluster_comparison == "Intra") %>%
  mutate(Geo_distance_disc = as.factor(500*round(Geo_distance/500000)))%>%
  #filter(Geo_distance < 4000000) %>%
  ggplot(aes(x = Geo_distance_disc, y = Relatedness, col = Cluster1)) +
  geom_boxplot(outlier.shape = 1) + 
  theme_bw() +
  facet_wrap(vars(Cluster1), scales = "free_y") +
  Color_Cluster  +
  geom_text(data = temp, aes(x = Geo_distance_disc, y = 1, label = n), 
            col = "black", angle = 90, size = 4) +
  theme(axis.text.x = element_text(angle = 45, hjust =1, vjust = 1))